diff --git a/.github/workflows/dev-mkl-vtk.yml b/.github/workflows/dev-mkl-vtk.yml index e9ca7907e..246299c39 100644 --- a/.github/workflows/dev-mkl-vtk.yml +++ b/.github/workflows/dev-mkl-vtk.yml @@ -32,8 +32,8 @@ jobs: ./suanPan-linux-mkl-vtk/bin/suanPan -v - uses: actions/upload-artifact@v3 with: - name: suanPan-2.6.1-1.x86_64.deb - path: suanPan-2.6.1-1.x86_64.deb + name: suanPan-2.7.0-1.x86_64.deb + path: suanPan-2.7.0-1.x86_64.deb fedora-mkl-vtk-dev: runs-on: ubuntu-22.04 container: fedora:35 @@ -66,10 +66,10 @@ jobs: make package -j"$(nproc)" - name: Test run: | - dnf install ./suanPan-2.6.1-1.x86_64.rpm -y + dnf install ./suanPan-2.7.0-1.x86_64.rpm -y suanPan -v - name: Upload uses: actions/upload-artifact@v3 with: - name: suanPan-2.6.1-1.x86_64.rpm - path: suanPan-2.6.1-1.x86_64.rpm + name: suanPan-2.7.0-1.x86_64.rpm + path: suanPan-2.7.0-1.x86_64.rpm diff --git a/.github/workflows/dev-vtk.yml b/.github/workflows/dev-vtk.yml index 515e078b5..3d4489c86 100644 --- a/.github/workflows/dev-vtk.yml +++ b/.github/workflows/dev-vtk.yml @@ -29,8 +29,8 @@ jobs: ./suanPan-linux-openblas-vtk/bin/suanPan -v - uses: actions/upload-artifact@v3 with: - name: suanPan-2.6.1-1.x86_64.deb - path: suanPan-2.6.1-1.x86_64.deb + name: suanPan-2.7.0-1.x86_64.deb + path: suanPan-2.7.0-1.x86_64.deb macos-openblas-vtk-dev: runs-on: macos-11 steps: @@ -53,3 +53,29 @@ jobs: run: | export DYLD_LIBRARY_PATH=/Users/runner/work/suanPan/suanPan/suanPan-macos-openblas-vtk/lib/ ./suanPan-macos-openblas-vtk/bin/suanPan -v + windows-openblas-vtk-dev: + runs-on: windows-2022 + steps: + - name: Clone + uses: actions/checkout@v3 + - name: Install VTK + run: | + C:/msys64/usr/bin/wget.exe https://github.com/TLCFEM/prebuilds/releases/download/latest/VTK-9.2.2-win.7z + 7z x VTK-9.2.2-win.7z + - name: Golang + uses: actions/setup-go@v3 + - name: Compile + run: | + go build Checker/updater.go + cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_MULTITHREAD=ON -DUSE_HDF5=ON -DUSE_EXTERNAL_VTK=ON -DVTK_DIR=D:/a/suanPan/suanPan/lib/cmake/vtk-9.2/ -DCMAKE_INSTALL_PREFIX=suanPan-win-openblas-vtk . + cmake --build . --target INSTALL --config Release + - name: Pack + run: | + cp updater.exe suanPan-win-openblas-vtk/bin + cd suanPan-win-openblas-vtk/bin + 7z a ../../suanPan-win-openblas-vtk.7z ./* + ./suanPan.exe -v + - uses: actions/upload-artifact@v3 + with: + name: suanPan-win-openblas-vtk.7z + path: suanPan-win-openblas-vtk.7z diff --git a/.github/workflows/master-mkl-vtk.yml b/.github/workflows/master-mkl-vtk.yml index 8c320f9d9..5549ebb6b 100644 --- a/.github/workflows/master-mkl-vtk.yml +++ b/.github/workflows/master-mkl-vtk.yml @@ -32,7 +32,7 @@ jobs: tar czf suanPan-linux-mkl-vtk.tar.gz suanPan-linux-mkl-vtk - name: Test run: | - sudo apt-get install ./suanPan-2.6.1-1.x86_64.deb -y + sudo apt-get install ./suanPan-2.7.0-1.x86_64.deb -y suanPan -v export LD_LIBRARY_PATH=/home/runner/work/suanPan/suanPan/suanPan-linux-mkl-vtk/lib cd suanPan-linux-mkl-vtk/bin @@ -50,8 +50,8 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: suanPan-2.6.1-1.x86_64.deb - asset_name: suanPan-2.6.1-1.x86_64.deb + file: suanPan-2.7.0-1.x86_64.deb + asset_name: suanPan-2.7.0-1.x86_64.deb tag: ${{ env.SP_TIME }} prerelease: true overwrite: true @@ -87,14 +87,14 @@ jobs: make package -j"$(nproc)" - name: Test run: | - dnf install ./suanPan-2.6.1-1.x86_64.rpm -y + dnf install ./suanPan-2.7.0-1.x86_64.rpm -y suanPan -v - name: RPM uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: suanPan-2.6.1-1.x86_64.rpm - asset_name: suanPan-2.6.1-1.x86_64.rpm + file: suanPan-2.7.0-1.x86_64.rpm + asset_name: suanPan-2.7.0-1.x86_64.rpm tag: ${{ env.SP_TIME }} prerelease: true overwrite: true diff --git a/CHANGELOG.md b/CHANGELOG.md index b5b37ebd8..c331c24e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ 1. `MKL` includes outdated `FEAST`, the external names in `FEAST` library are modified to avoid linking error. 2. `OpenBLAS` causes SEGFAULT with version 0.3.15+ when compiled with `DYNAMIC_ARCH` enabled. +## version 2.7 + +1. optimise assembling of symmetric global matrices [#79](https://github.com/TLCFEM/suanPan/pull/79) +2. extend `BatheTwoStep` to allow customisation of spectral radius [#81](https://github.com/TLCFEM/suanPan/pull/81) and sub-step size [#82](https://github.com/TLCFEM/suanPan/pull/82) +3. update `Catch2` to version `2.13.10` +4. update `Armadillo` to version `11.4` +5. update modern `Arpack` [#94](https://github.com/TLCFEM/suanPan/pull/94) +6. add `Tchamwa` [#88](https://github.com/TLCFEM/suanPan/pull/88), `BatheExplicit` [#90](https://github.com/TLCFEM/suanPan/pull/90) and `GeneralisedAlphaExplicit` [#93](https://github.com/TLCFEM/suanPan/pull/93) explicit time integration methods +7. add `OALTS` two-step implicit time integration method [#92](https://github.com/TLCFEM/suanPan/pull/92) +8. add `Sinh1D` and `Tanh1D` nonlinear elastic 1D material +9. add `linear_system` flag to speed up linear system analysis + ## version 2.6.1 1. add `-nu` (`--noupdate`) flag to skip check of new version on startup diff --git a/CITATION.cff b/CITATION.cff index b3b15ee68..cd65e6600 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -28,5 +28,5 @@ keywords: - IGA - Solid Mechanics license: GPL-3.0 -version: '2.5' -date-released: '2022-08-13' +version: '2.6.1' +date-released: '2022-11-09' diff --git a/CMakeLists.txt b/CMakeLists.txt index 77f8cc8d2..97d3088a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,14 +79,13 @@ if (FORTRAN_STATUS) add_subdirectory(Toolbox/amd-src) add_subdirectory(Toolbox/arpack-src) add_subdirectory(Toolbox/feast-src) - add_subdirectory(Toolbox/lapack-ext) add_subdirectory(Toolbox/mumps-src) add_subdirectory(Toolbox/fext) - target_link_libraries(${PROJECT_NAME} amd arpack spmm mumps feast) - message("Linking additional amd arpack spmm mumps feast") + target_link_libraries(${PROJECT_NAME} amd arpack mumps feast) + message("Linking additional amd arpack mumps feast") elseif (COMPILER_IDENTIFIER MATCHES "vs") target_link_libraries(${PROJECT_NAME} libfext) - message("Linking precompiled fext (packed with amd arpack spmm mumps feast)") + message("Linking precompiled fext (packed with amd arpack mumps feast)") else () message(FATAL_ERROR "Please install a valid FORTRAN compiler.") endif () @@ -346,7 +345,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux") set(CPACK_PACKAGE_ICON ${ROOT}/Resource/suanPan-ua.svg) set(CPACK_PACKAGE_RELEASE 1) set(CPACK_PACKAGE_VENDOR "tlcfem") - set(CPACK_PACKAGE_VERSION "2.6.1") + set(CPACK_PACKAGE_VERSION "2.7.0") set(CPACK_PACKAGE_DESCRIPTION "An Open Source, Parallel and Heterogeneous Finite Element Analysis Framework") set(CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/TLCFEM/suanPan") diff --git a/Constraint/BC/MultiplierBC.cpp b/Constraint/BC/MultiplierBC.cpp index 25dfd9313..8178f8784 100644 --- a/Constraint/BC/MultiplierBC.cpp +++ b/Constraint/BC/MultiplierBC.cpp @@ -18,6 +18,8 @@ #include "MultiplierBC.h" #include #include +#include +#include /** * \brief method to apply the BC to the system. @@ -31,21 +33,29 @@ int MultiplierBC::process(const shared_ptr& D) { // the container used is concurrently safe D->insert_restrained_dof(dof_encoding = get_nodal_active_dof(D)); - if(auto& t_stiff = W->get_stiffness(); nullptr != t_stiff) { - std::scoped_lock lock(W->get_stiffness_mutex()); - for(const auto I : dof_encoding) t_stiff->unify(I); + if(IntegratorType::Explicit == D->get_current_step()->get_integrator()->type()) { + if(auto& t_mass = W->get_mass(); nullptr != t_mass) { + std::scoped_lock lock(W->get_mass_mutex()); + for(const auto I : dof_encoding) t_mass->unify(I); + } } - if(auto& t_mass = W->get_mass(); nullptr != t_mass) { - std::scoped_lock lock(W->get_mass_mutex()); - for(const auto I : dof_encoding) t_mass->nullify(I); - } - if(auto& t_damping = W->get_damping(); nullptr != t_damping) { - std::scoped_lock lock(W->get_damping_mutex()); - for(const auto I : dof_encoding) t_damping->nullify(I); - } - if(auto& t_geometry = W->get_geometry(); nullptr != t_geometry) { - std::scoped_lock lock(W->get_geometry_mutex()); - for(const auto I : dof_encoding) t_geometry->nullify(I); + else { + if(auto& t_stiff = W->get_stiffness(); nullptr != t_stiff) { + std::scoped_lock lock(W->get_stiffness_mutex()); + for(const auto I : dof_encoding) t_stiff->unify(I); + } + if(auto& t_mass = W->get_mass(); nullptr != t_mass) { + std::scoped_lock lock(W->get_mass_mutex()); + for(const auto I : dof_encoding) t_mass->nullify(I); + } + if(auto& t_damping = W->get_damping(); nullptr != t_damping) { + std::scoped_lock lock(W->get_damping_mutex()); + for(const auto I : dof_encoding) t_damping->nullify(I); + } + if(auto& t_geometry = W->get_geometry(); nullptr != t_geometry) { + std::scoped_lock lock(W->get_geometry_mutex()); + for(const auto I : dof_encoding) t_geometry->nullify(I); + } } return SUANPAN_SUCCESS; diff --git a/Constraint/LJPotential2D.h b/Constraint/LJPotential2D.h index af317b42a..ea3775b8b 100644 --- a/Constraint/LJPotential2D.h +++ b/Constraint/LJPotential2D.h @@ -34,6 +34,7 @@ class LJPotential2D final : public ParticleCollision2D { [[nodiscard]] double compute_f(double) const override; [[nodiscard]] double compute_df(double) const override; + public: using ParticleCollision2D::ParticleCollision2D; }; diff --git a/Constraint/LinearSpring2D.h b/Constraint/LinearSpring2D.h index a4a4de289..e861e64b5 100644 --- a/Constraint/LinearSpring2D.h +++ b/Constraint/LinearSpring2D.h @@ -34,6 +34,7 @@ class LinearSpring2D final : public ParticleCollision2D { [[nodiscard]] double compute_f(double) const override; [[nodiscard]] double compute_df(double) const override; + public: using ParticleCollision2D::ParticleCollision2D; }; diff --git a/Constraint/RestitutionWallPenalty.cpp b/Constraint/RestitutionWallPenalty.cpp index d6171e6be..11bb3816c 100644 --- a/Constraint/RestitutionWallPenalty.cpp +++ b/Constraint/RestitutionWallPenalty.cpp @@ -64,7 +64,6 @@ int RestitutionWallPenalty::process(const shared_ptr& D) { resistance.zeros(W->get_size()); auto counter = 0llu; for(const auto& I : node_pool) { - if(dot(current_velocity_handler(I), outer_norm) > 0.) continue; const auto c_vel = current_velocity_handler(I); if(dot(c_vel, outer_norm) > 0.) continue; auto& t_dof = I->get_reordered_dof(); @@ -85,7 +84,7 @@ int RestitutionWallPenalty::process(const shared_ptr& D) { return SUANPAN_SUCCESS; } -int RestitutionWallPenalty::stage(const shared_ptr& D) { +void RestitutionWallPenalty::stage(const shared_ptr& D) { auto& W = D->get_factory(); auto trial_acceleration = get_trial_acceleration(W); @@ -97,8 +96,6 @@ int RestitutionWallPenalty::stage(const shared_ptr& D) { } W->update_trial_acceleration(trial_acceleration); - - return SUANPAN_SUCCESS; } void RestitutionWallPenalty::commit_status() { node_pool.clear(); } diff --git a/Constraint/RestitutionWallPenalty.h b/Constraint/RestitutionWallPenalty.h index b4b55731f..272acab89 100644 --- a/Constraint/RestitutionWallPenalty.h +++ b/Constraint/RestitutionWallPenalty.h @@ -38,6 +38,7 @@ class RestitutionWallPenalty : public RigidWallPenalty { suanpan::set> node_pool; const double restitution_coefficient; + public: RestitutionWallPenalty(unsigned, unsigned, unsigned, vec&&, vec&&, double, double, unsigned); RestitutionWallPenalty(unsigned, unsigned, unsigned, vec&&, vec&&, vec&&, double, double, unsigned); @@ -46,7 +47,7 @@ class RestitutionWallPenalty : public RigidWallPenalty { int process(const shared_ptr&) override; - int stage(const shared_ptr&) override; + void stage(const shared_ptr&) override; void commit_status() override; void clear_status() override; diff --git a/Constraint/RigidWallPenalty.h b/Constraint/RigidWallPenalty.h index b0a14b2c1..53aa042a7 100644 --- a/Constraint/RigidWallPenalty.h +++ b/Constraint/RigidWallPenalty.h @@ -51,6 +51,7 @@ class RigidWallPenalty : public Constraint { Col (*trial_displacement_handler)(const shared_ptr&) = nullptr; Col (*trial_velocity_handler)(const shared_ptr&) = nullptr; Col (*trial_acceleration_handler)(const shared_ptr&) = nullptr; + public: RigidWallPenalty(unsigned, unsigned, unsigned, vec&&, vec&&, double, unsigned); RigidWallPenalty(unsigned, unsigned, unsigned, vec&&, vec&&, vec&&, double, unsigned); diff --git a/Converger/AbsDisp.cpp b/Converger/AbsDisp.cpp index fd0b3a2c4..9a38e0f17 100644 --- a/Converger/AbsDisp.cpp +++ b/Converger/AbsDisp.cpp @@ -20,7 +20,7 @@ #include /** - * \brief the complete constructor. + * \brief The complete constructor. * \param T `unique_tag` * \param E `tolerance` * \param M `max_iteration` @@ -31,8 +31,10 @@ AbsDisp::AbsDisp(const unsigned T, const double E, const unsigned M, const bool unique_ptr AbsDisp::get_copy() { return make_unique(*this); } -bool AbsDisp::is_converged() { - set_error(norm(get_domain().lock()->get_factory()->get_incre_displacement())); +bool AbsDisp::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); + + set_error(norm(W->get_incre_displacement() + W->get_ninja()) / static_cast(W->get_size())); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("absolute displacement error: %.5E.\n", get_error()); diff --git a/Converger/AbsDisp.h b/Converger/AbsDisp.h index e7ca49c38..aba9156e2 100644 --- a/Converger/AbsDisp.h +++ b/Converger/AbsDisp.h @@ -44,7 +44,7 @@ class AbsDisp final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/AbsError.cpp b/Converger/AbsError.cpp index d0d7e5686..e2755d38e 100644 --- a/Converger/AbsError.cpp +++ b/Converger/AbsError.cpp @@ -35,8 +35,10 @@ unique_ptr AbsError::get_copy() { return make_unique(*this) * \brief Method to return `conv_flag`. * \return `conv_flag` */ -bool AbsError::is_converged() { - set_error(get_domain().lock()->get_factory()->get_error()); +bool AbsError::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); + + set_error(fabs(W->get_error()) / static_cast(W->get_size())); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("absolute error: %.5E.\n", get_error()); diff --git a/Converger/AbsError.h b/Converger/AbsError.h index 5f0e24502..beeb2b987 100644 --- a/Converger/AbsError.h +++ b/Converger/AbsError.h @@ -45,7 +45,7 @@ class AbsError final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/AbsIncreAcc.cpp b/Converger/AbsIncreAcc.cpp new file mode 100644 index 000000000..f9d9c1ab6 --- /dev/null +++ b/Converger/AbsIncreAcc.cpp @@ -0,0 +1,47 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "AbsIncreAcc.h" +#include +#include + +/** + * \brief The default constructor. + * \param T `unique_tag` + * \param E `tolerance` + * \param M `max_iteration` + * \param P `print_flag` + */ +AbsIncreAcc::AbsIncreAcc(const unsigned T, const double E, const unsigned M, const bool P) + : Converger(T, E, M, P) {} + +unique_ptr AbsIncreAcc::get_copy() { return make_unique(*this); } + +/** + * \brief Method to return `conv_flag`. + * \return `conv_flag` + */ +bool AbsIncreAcc::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); + + set_error(norm(W->get_ninja()) / static_cast(W->get_size())); + set_conv_flag(get_tolerance() > get_error()); + + if(is_print()) suanpan_info("absolute incremental acceleration error: %.5E.\n", get_error()); + + return get_conv_flag(); +} diff --git a/Converger/AbsIncreAcc.h b/Converger/AbsIncreAcc.h new file mode 100644 index 000000000..b77c73e53 --- /dev/null +++ b/Converger/AbsIncreAcc.h @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class AbsIncreAcc + * @brief The AbsIncreAcc class that handles converger test to indicate if the + * iteration converges. + * @author tlc + * @date 03/12/2022 + * @version 0.1.0 + * @file AbsIncreAcc.h + * @addtogroup Converger + * @{ + */ + +#ifndef ABSINCREACC_H +#define ABSINCREACC_H + +#include "Converger.h" + +class AbsIncreAcc final : public Converger { +public: + explicit AbsIncreAcc(unsigned = 0, double = 1E-8, unsigned = 7, bool = false); + + unique_ptr get_copy() override; + + bool is_converged(unsigned) override; +}; + +#endif + +//! @} diff --git a/Converger/AbsIncreDisp.cpp b/Converger/AbsIncreDisp.cpp index d8e12881a..748031fe3 100644 --- a/Converger/AbsIncreDisp.cpp +++ b/Converger/AbsIncreDisp.cpp @@ -35,9 +35,10 @@ unique_ptr AbsIncreDisp::get_copy() { return make_uniqueget_factory(); - set_error(norm(W->get_ninja()) / static_cast(W->get_ninja().n_elem)); + + set_error(norm(W->get_ninja()) / static_cast(W->get_size())); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("absolute incremental displacement error: %.5E.\n", get_error()); diff --git a/Converger/AbsIncreDisp.h b/Converger/AbsIncreDisp.h index 28504462d..16f35657c 100644 --- a/Converger/AbsIncreDisp.h +++ b/Converger/AbsIncreDisp.h @@ -37,7 +37,7 @@ class AbsIncreDisp final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/AbsIncreEnergy.cpp b/Converger/AbsIncreEnergy.cpp index 302399fbf..7ae326a85 100644 --- a/Converger/AbsIncreEnergy.cpp +++ b/Converger/AbsIncreEnergy.cpp @@ -31,12 +31,10 @@ AbsIncreEnergy::AbsIncreEnergy(const unsigned T, const double E, const unsigned unique_ptr AbsIncreEnergy::get_copy() { return make_unique(*this); } -bool AbsIncreEnergy::is_converged() { - const auto& D = get_domain().lock(); - - if(auto& W = D->get_factory(); W->get_reference_load().is_empty() || W->get_trial_load_factor().is_empty()) set_error(fabs(dot(W->get_ninja(), W->get_trial_load() - W->get_sushi())) / static_cast(W->get_ninja().n_elem)); - else set_error(fabs(dot(W->get_ninja(), W->get_reference_load() * W->get_trial_load_factor() + W->get_trial_load() - W->get_sushi())) / static_cast(W->get_ninja().n_elem)); +bool AbsIncreEnergy::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); + set_error(fabs(dot(W->get_ninja(), get_residual())) / static_cast(W->get_size())); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("absolute energy increment error: %.5E.\n", get_error()); diff --git a/Converger/AbsIncreEnergy.h b/Converger/AbsIncreEnergy.h index ef00347c0..b59a975ec 100644 --- a/Converger/AbsIncreEnergy.h +++ b/Converger/AbsIncreEnergy.h @@ -44,7 +44,7 @@ class AbsIncreEnergy final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/AbsResidual.cpp b/Converger/AbsResidual.cpp index 5d6eaad0a..980271225 100644 --- a/Converger/AbsResidual.cpp +++ b/Converger/AbsResidual.cpp @@ -24,17 +24,10 @@ AbsResidual::AbsResidual(const unsigned T, const double E, const unsigned M, con unique_ptr AbsResidual::get_copy() { return make_unique(*this); } -bool AbsResidual::is_converged() { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); - - vec residual = W->get_trial_load() - W->get_trial_resistance(); - if(!W->get_reference_load().is_empty() && !W->get_trial_load_factor().is_empty()) residual += W->get_reference_load() * W->get_trial_load_factor(); - - for(const auto& t_dof : D->get_restrained_dof()) residual(t_dof) = 0.; - - set_error(norm(residual)); +bool AbsResidual::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); + set_error(norm(get_residual()) / static_cast(W->get_size())); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("absolute residual: %.5E.\n", get_error()); diff --git a/Converger/AbsResidual.h b/Converger/AbsResidual.h index bceb5d92b..3154c6eb7 100644 --- a/Converger/AbsResidual.h +++ b/Converger/AbsResidual.h @@ -47,7 +47,7 @@ class AbsResidual final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/CMakeLists.txt b/Converger/CMakeLists.txt index ebfe2f190..1bb497ad2 100644 --- a/Converger/CMakeLists.txt +++ b/Converger/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(${PROJECT_NAME} STATIC AbsDisp.cpp AbsError.cpp AbsIncreDisp.cpp + AbsIncreAcc.cpp AbsIncreEnergy.cpp AbsResidual.cpp Converger.cpp @@ -17,6 +18,7 @@ add_library(${PROJECT_NAME} STATIC RelDisp.cpp RelError.cpp RelIncreDisp.cpp + RelIncreAcc.cpp RelIncreEnergy.cpp RelResidual.cpp ) diff --git a/Converger/Converger b/Converger/Converger index d9b356006..c19a286dd 100644 --- a/Converger/Converger +++ b/Converger/Converger @@ -3,6 +3,7 @@ #include "AbsDisp.h" #include "AbsError.h" #include "AbsIncreDisp.h" +#include "AbsIncreAcc.h" #include "AbsIncreEnergy.h" #include "AbsResidual.h" #include "FixedNumber.h" @@ -10,5 +11,6 @@ #include "RelDisp.h" #include "RelError.h" #include "RelIncreDisp.h" +#include "RelIncreAcc.h" #include "RelIncreEnergy.h" #include "RelResidual.h" \ No newline at end of file diff --git a/Converger/Converger.cpp b/Converger/Converger.cpp index e8c275ed7..6ddb9797a 100644 --- a/Converger/Converger.cpp +++ b/Converger/Converger.cpp @@ -17,6 +17,7 @@ #include "Converger.h" #include +#include /** * \brief the complete constructor. @@ -92,6 +93,18 @@ void Converger::set_conv_flag(const bool C) { conv_flag = C; } */ bool Converger::get_conv_flag() const { return conv_flag; } +vec Converger::get_residual() const { + const auto& D = get_domain().lock(); + auto& W = D->get_factory(); + + vec residual = W->get_trial_load() - W->get_sushi(); + if(!W->get_reference_load().is_empty() && !W->get_trial_load_factor().is_empty()) residual += W->get_reference_load() * W->get_trial_load_factor(); + + for(const auto& t_dof : D->get_restrained_dof()) residual(t_dof) = 0.; + + return residual; +} + /** * \brief method to return `print_flag`. * \return `print_flag` diff --git a/Converger/Converger.h b/Converger/Converger.h index 19683a6d5..ade6a918e 100644 --- a/Converger/Converger.h +++ b/Converger/Converger.h @@ -54,6 +54,8 @@ class Converger : public Tag { bool conv_flag = false; /**< converger flag */ protected: + [[nodiscard]] vec get_residual() const; + [[nodiscard]] bool is_print() const; public: @@ -83,7 +85,7 @@ class Converger : public Tag { virtual void set_conv_flag(bool); [[nodiscard]] bool get_conv_flag() const; - virtual bool is_converged() = 0; + virtual bool is_converged(unsigned) = 0; }; #endif diff --git a/Converger/ConvergerParser.cpp b/Converger/ConvergerParser.cpp index 543980d1a..858da67a7 100644 --- a/Converger/ConvergerParser.cpp +++ b/Converger/ConvergerParser.cpp @@ -70,6 +70,8 @@ int create_new_converger(const shared_ptr& domain, istringstream& co else if(is_equal(converger_id, "RelResidual") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; else if(is_equal(converger_id, "AbsIncreDisp") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; else if(is_equal(converger_id, "RelIncreDisp") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; + else if(is_equal(converger_id, "AbsIncreAcc") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; + else if(is_equal(converger_id, "RelIncreAcc") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; else if(is_equal(converger_id, "AbsDisp") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; else if(is_equal(converger_id, "RelDisp") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; else if(is_equal(converger_id, "AbsError") && domain->insert(make_shared(tag, tolerance, max_iteration, is_true(print_flag)))) code = 1; diff --git a/Converger/FixedNumber.cpp b/Converger/FixedNumber.cpp index 84b75ce7d..5efaa2ed3 100644 --- a/Converger/FixedNumber.cpp +++ b/Converger/FixedNumber.cpp @@ -17,11 +17,6 @@ #include "FixedNumber.h" -void FixedNumber::set_conv_flag(const bool F) { - if(F) counter = 0; - Converger::set_conv_flag(F); -} - /** * \brief the complete constructor. * \param T `unique_tag` @@ -33,9 +28,7 @@ FixedNumber::FixedNumber(const unsigned T, const unsigned M, const bool P) unique_ptr FixedNumber::get_copy() { return make_unique(*this); } -bool FixedNumber::is_converged() { - ++counter; - +bool FixedNumber::is_converged(const unsigned counter) { if(is_print()) suanpan_info("iteration counter: %u.\n", counter); set_conv_flag(get_max_iteration() <= counter); diff --git a/Converger/FixedNumber.h b/Converger/FixedNumber.h index b7d6502ef..c04a44abc 100644 --- a/Converger/FixedNumber.h +++ b/Converger/FixedNumber.h @@ -33,17 +33,12 @@ #include "Converger.h" class FixedNumber final : public Converger { - unsigned counter = 0; - -protected: - void set_conv_flag(bool) override; - public: explicit FixedNumber(unsigned = 0, unsigned = 7, bool = false); unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/LogicConverger.cpp b/Converger/LogicConverger.cpp index f87dded76..ea9c2aec1 100644 --- a/Converger/LogicConverger.cpp +++ b/Converger/LogicConverger.cpp @@ -51,9 +51,9 @@ int LogicConverger::initialize() { unique_ptr LogicAND::get_copy() { return make_unique(*this); } -bool LogicAND::is_converged() { - auto result_a = std::async([&] { return converger_a->is_converged(); }); - auto result_b = std::async([&] { return converger_b->is_converged(); }); +bool LogicAND::is_converged(const unsigned counter) { + auto result_a = std::async([&] { return converger_a->is_converged(counter); }); + auto result_b = std::async([&] { return converger_b->is_converged(counter); }); const auto logic_result = result_a.get() && result_b.get(); set_conv_flag(logic_result); @@ -65,9 +65,9 @@ bool LogicAND::is_converged() { unique_ptr LogicOR::get_copy() { return make_unique(*this); } -bool LogicOR::is_converged() { - auto result_a = std::async([&] { return converger_a->is_converged(); }); - auto result_b = std::async([&] { return converger_b->is_converged(); }); +bool LogicOR::is_converged(const unsigned counter) { + auto result_a = std::async([&] { return converger_a->is_converged(counter); }); + auto result_b = std::async([&] { return converger_b->is_converged(counter); }); const auto logic_result = result_a.get() || result_b.get(); set_conv_flag(logic_result); @@ -79,9 +79,9 @@ bool LogicOR::is_converged() { unique_ptr LogicXOR::get_copy() { return make_unique(*this); } -bool LogicXOR::is_converged() { - auto result_a = std::async([&] { return converger_a->is_converged(); }); - auto result_b = std::async([&] { return converger_b->is_converged(); }); +bool LogicXOR::is_converged(const unsigned counter) { + auto result_a = std::async([&] { return converger_a->is_converged(counter); }); + auto result_b = std::async([&] { return converger_b->is_converged(counter); }); const auto logic_result = result_a.get() != result_b.get(); set_conv_flag(logic_result); diff --git a/Converger/LogicConverger.h b/Converger/LogicConverger.h index 05ad5770c..499849421 100644 --- a/Converger/LogicConverger.h +++ b/Converger/LogicConverger.h @@ -49,7 +49,7 @@ class LogicAND final : public LogicConverger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; class LogicOR final : public LogicConverger { @@ -58,7 +58,7 @@ class LogicOR final : public LogicConverger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; class LogicXOR final : public LogicConverger { @@ -67,7 +67,7 @@ class LogicXOR final : public LogicConverger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/RelDisp.cpp b/Converger/RelDisp.cpp index ec4741dab..44e029578 100644 --- a/Converger/RelDisp.cpp +++ b/Converger/RelDisp.cpp @@ -24,10 +24,11 @@ RelDisp::RelDisp(const unsigned T, const double E, const unsigned M, const bool unique_ptr RelDisp::get_copy() { return make_unique(*this); } -bool RelDisp::is_converged() { - const auto& t_factory = get_domain().lock()->get_factory(); +bool RelDisp::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); - set_error(norm(t_factory->get_incre_displacement() / t_factory->get_trial_displacement())); + const auto rel_disp = norm(W->get_incre_displacement() + W->get_ninja()) / norm(W->get_trial_displacement()); + set_error(std::isfinite(rel_disp) ? rel_disp : 1.); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("relative displacement error: %.5E.\n", get_error()); diff --git a/Converger/RelDisp.h b/Converger/RelDisp.h index 17f33ef06..2c04b947c 100644 --- a/Converger/RelDisp.h +++ b/Converger/RelDisp.h @@ -37,7 +37,7 @@ class RelDisp final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/RelError.cpp b/Converger/RelError.cpp index a029ee0a1..af37abb15 100644 --- a/Converger/RelError.cpp +++ b/Converger/RelError.cpp @@ -35,10 +35,11 @@ unique_ptr RelError::get_copy() { return make_unique(*this) * \brief Method to return `conv_flag`. * \return `conv_flag` */ -bool RelError::is_converged() { - const auto& t_factory = get_domain().lock()->get_factory(); +bool RelError::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); - set_error(t_factory->get_error() / norm(t_factory->get_trial_displacement())); + const auto rel_error = fabs(W->get_error()) / norm(W->get_trial_displacement()); + set_error(std::isfinite(rel_error) ? rel_error : 1.); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("relative error: %.5E.\n", get_error()); diff --git a/Converger/RelError.h b/Converger/RelError.h index a6d0c21c5..e25d41d48 100644 --- a/Converger/RelError.h +++ b/Converger/RelError.h @@ -37,7 +37,7 @@ class RelError final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/RelIncreAcc.cpp b/Converger/RelIncreAcc.cpp new file mode 100644 index 000000000..a7c59621c --- /dev/null +++ b/Converger/RelIncreAcc.cpp @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "RelIncreAcc.h" +#include +#include + +RelIncreAcc::RelIncreAcc(const unsigned T, const double E, const unsigned M, const bool P) + : Converger(T, E, M, P) {} + +unique_ptr RelIncreAcc::get_copy() { return make_unique(*this); } + +bool RelIncreAcc::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); + + const auto rel_incre_disp = norm(W->get_ninja()) / norm(W->get_incre_acceleration() + W->get_ninja()); + set_error(std::isfinite(rel_incre_disp) ? rel_incre_disp : 1.); + set_conv_flag(get_tolerance() > get_error()); + + if(is_print()) suanpan_info("relative incremental acceleration error: %.5E.\n", get_error()); + + return get_conv_flag(); +} diff --git a/Converger/RelIncreAcc.h b/Converger/RelIncreAcc.h new file mode 100644 index 000000000..8661cf59a --- /dev/null +++ b/Converger/RelIncreAcc.h @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class RelIncreAcc + * @brief The RelIncreAcc class that handles converger test to indicate if the + * iteration converges. + * @author tlc + * @date 03/12/2022 + * @version 0.1.0 + * @file RelIncreAcc.h + * @addtogroup Converger + * @{ + */ + +#ifndef RELINCREACC_H +#define RELINCREACC_H + +#include "Converger.h" + +class RelIncreAcc final : public Converger { +public: + explicit RelIncreAcc(unsigned = 0, double = 1E-8, unsigned = 7, bool = false); + + unique_ptr get_copy() override; + + bool is_converged(unsigned) override; +}; + +#endif + +//! @} diff --git a/Converger/RelIncreDisp.cpp b/Converger/RelIncreDisp.cpp index 9e556acd5..aa119a362 100644 --- a/Converger/RelIncreDisp.cpp +++ b/Converger/RelIncreDisp.cpp @@ -24,13 +24,11 @@ RelIncreDisp::RelIncreDisp(const unsigned T, const double E, const unsigned M, c unique_ptr RelIncreDisp::get_copy() { return make_unique(*this); } -bool RelIncreDisp::is_converged() { - auto& t_factory = get_domain().lock()->get_factory(); +bool RelIncreDisp::is_converged(unsigned) { + auto& W = get_domain().lock()->get_factory(); - const auto n_norm = norm(t_factory->get_ninja()); - const auto d_norm = norm(t_factory->get_incre_displacement()); - const auto rel_error = n_norm / d_norm; - set_error(std::isfinite(rel_error) ? rel_error : n_norm); + const auto rel_incre_disp = norm(W->get_ninja()) / norm(W->get_incre_displacement() + W->get_ninja()); + set_error(std::isfinite(rel_incre_disp) ? rel_incre_disp : 1.); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("relative incremental displacement error: %.5E.\n", get_error()); diff --git a/Converger/RelIncreDisp.h b/Converger/RelIncreDisp.h index 63678fa63..f8daffd66 100644 --- a/Converger/RelIncreDisp.h +++ b/Converger/RelIncreDisp.h @@ -37,7 +37,7 @@ class RelIncreDisp final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/RelIncreEnergy.cpp b/Converger/RelIncreEnergy.cpp index b062b3adf..92f802c72 100644 --- a/Converger/RelIncreEnergy.cpp +++ b/Converger/RelIncreEnergy.cpp @@ -31,27 +31,14 @@ RelIncreEnergy::RelIncreEnergy(const unsigned T, const double E, const unsigned unique_ptr RelIncreEnergy::get_copy() { return make_unique(*this); } -bool RelIncreEnergy::is_converged() { +bool RelIncreEnergy::is_converged(const unsigned counter) { const auto& D = get_domain().lock(); auto& W = D->get_factory(); - vec residual = W->get_trial_load() - W->get_sushi(); - - if(!W->get_reference_load().is_empty() && !W->get_trial_load_factor().is_empty()) residual += W->get_reference_load() * W->get_trial_load_factor(); - - for(const auto& t_dof : D->get_restrained_dof()) residual(t_dof) = 0.; - - if(fabs(ref_energy + 1.) < 1E-12) { - ref_energy = fabs(dot(W->get_ninja(), residual)); - set_error(1.); - } - else set_error(fabs(dot(W->get_ninja(), residual)) / ref_energy); - - if(get_tolerance() > get_error()) { - set_conv_flag(true); - ref_energy = -1.; - } - else set_conv_flag(false); + const auto energy = fabs(dot(W->get_ninja(), get_residual())); + if(0u == counter) ref_energy = energy; + set_error(energy / ref_energy); + set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("relative energy increment error: %.5E.\n", get_error()); diff --git a/Converger/RelIncreEnergy.h b/Converger/RelIncreEnergy.h index 2b337d38e..b70e286b9 100644 --- a/Converger/RelIncreEnergy.h +++ b/Converger/RelIncreEnergy.h @@ -46,7 +46,7 @@ class RelIncreEnergy final : public Converger { unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Converger/RelResidual.cpp b/Converger/RelResidual.cpp index 8c1ae2e16..d89733ac3 100644 --- a/Converger/RelResidual.cpp +++ b/Converger/RelResidual.cpp @@ -16,30 +16,16 @@ ******************************************************************************/ #include "RelResidual.h" -#include -#include RelResidual::RelResidual(const unsigned T, const double E, const unsigned M, const bool P) : Converger(T, E, M, P) {} unique_ptr RelResidual::get_copy() { return make_unique(*this); } -bool RelResidual::is_converged() { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); - - auto residual = W->get_trial_load(); - - if(!W->get_reference_load().is_empty() && !W->get_trial_load_factor().is_empty()) residual += W->get_reference_load() * W->get_trial_load_factor(); - - const auto ref_residual = norm(residual); - - residual -= W->get_trial_resistance(); - - for(const auto& t_dof : D->get_restrained_dof()) residual(t_dof) = 0.; - - set_error(norm(residual) / ref_residual); - +bool RelResidual::is_converged(const unsigned counter) { + const auto residual = norm(get_residual()); + if(0u == counter) ref_residual = residual; + set_error(residual / ref_residual); set_conv_flag(get_tolerance() > get_error()); if(is_print()) suanpan_info("relative residual: %.5E.\n", get_error()); diff --git a/Converger/RelResidual.h b/Converger/RelResidual.h index 938640cc3..a99dcc32f 100644 --- a/Converger/RelResidual.h +++ b/Converger/RelResidual.h @@ -46,12 +46,14 @@ class DomainBase; class RelResidual final : public Converger { + double ref_residual = 0.; + public: explicit RelResidual(unsigned = 0, double = 1E-8, unsigned = 7, bool = false); unique_ptr get_copy() override; - bool is_converged() override; + bool is_converged(unsigned) override; }; #endif diff --git a/Database/HDF.cpp b/Database/HDF.cpp index 03b2b2e58..23dda0652 100644 --- a/Database/HDF.cpp +++ b/Database/HDF.cpp @@ -18,7 +18,7 @@ #ifdef SUANPAN_HDF5 #include "HDF.h" -#include +#include //#include diff --git a/Developer/Modifier/ModifierExample.cpp b/Developer/Modifier/ModifierExample.cpp index b562e995d..d7c2f6192 100644 --- a/Developer/Modifier/ModifierExample.cpp +++ b/Developer/Modifier/ModifierExample.cpp @@ -48,7 +48,7 @@ ModifierExample::ModifierExample(const unsigned T, const double A, const double , b(B) {} int ModifierExample::update_status() { - suanpan_for_each(element_pool.cbegin(), element_pool.cend(), [&](const weak_ptr& ele_ptr) { + suanpan::for_all(element_pool, [&](const weak_ptr& ele_ptr) { if(const auto t_ptr = ele_ptr.lock()) { mat t_damping(t_ptr->get_total_number(), t_ptr->get_total_number(), fill::zeros); if(a != 0. && !t_ptr->get_current_mass().empty()) t_damping += a * t_ptr->get_current_mass(); diff --git a/Domain/ConditionalModifier.cpp b/Domain/ConditionalModifier.cpp index 3642b0fa4..2f92b7df4 100644 --- a/Domain/ConditionalModifier.cpp +++ b/Domain/ConditionalModifier.cpp @@ -76,8 +76,6 @@ int ConditionalModifier::initialize(const shared_ptr& D) { int ConditionalModifier::process_resistance(const shared_ptr& D) { return process(D); } -int ConditionalModifier::stage(const shared_ptr&) { return SUANPAN_SUCCESS; } - const uvec& ConditionalModifier::get_node_encoding() const { return node_encoding; } const uvec& ConditionalModifier::get_dof_encoding() const { return dof_encoding; } diff --git a/Domain/ConditionalModifier.h b/Domain/ConditionalModifier.h index ade8d59ef..a4043d45c 100644 --- a/Domain/ConditionalModifier.h +++ b/Domain/ConditionalModifier.h @@ -96,7 +96,7 @@ class ConditionalModifier : public Tag { * predictor--corrector type algorithms. This method is called before committing trial status to perform * necessary operations. */ - virtual int stage(const shared_ptr&); + virtual void stage(const shared_ptr&) {} [[nodiscard]] const uvec& get_node_encoding() const; [[nodiscard]] const uvec& get_dof_encoding() const; diff --git a/Domain/Domain.cpp b/Domain/Domain.cpp index 5427fae95..6852f0602 100644 --- a/Domain/Domain.cpp +++ b/Domain/Domain.cpp @@ -41,7 +41,8 @@ Domain::Domain(const unsigned T) : DomainBase(T) - , factory(make_shared>()) {} + , factory(make_shared>()) + , attribute(10, false) {} Domain::~Domain() { for(const auto& I : thread_pond) I->get(); } @@ -712,6 +713,10 @@ bool Domain::is_updated() const { return updated.load(); } bool Domain::is_sparse() const { return factory->is_sparse(); } +void Domain::set_attribute(const ModalAttribute A) { attribute[static_cast(A)] = true; } + +bool Domain::get_attribute(const ModalAttribute A) { return attribute[static_cast(A)]; } + void Domain::set_color_model(const ColorMethod B) { color_model = B; color_map.clear(); @@ -1177,10 +1182,7 @@ void Domain::summary() const { suanpan_info("\t%llu loads, %llu constraints and %llu recorders.\n", get_load(), get_constraint(), get_recorder()); } -void Domain::erase_machine_error() const { - auto& t_ninja = get_ninja(factory); - suanpan::for_all(restrained_dofs, [&](const uword I) { t_ninja(I) = 0.; }); -} +void Domain::erase_machine_error(vec& ninja) const { suanpan::for_all(restrained_dofs, [&](const uword I) { ninja(I) = 0.; }); } void Domain::update_load() {} diff --git a/Domain/Domain.h b/Domain/Domain.h index f4e824b89..54ebb9f44 100644 --- a/Domain/Domain.h +++ b/Domain/Domain.h @@ -75,6 +75,8 @@ class Domain final : public DomainBase, public std::enable_shared_from_this> color_map; + std::vector attribute; + public: explicit Domain(unsigned = 0); Domain(const Domain&) = delete; // copy forbidden @@ -303,6 +305,9 @@ class Domain final : public DomainBase, public std::enable_shared_from_this>& get_color_map() const override; std::pair, suanpan::graph> get_element_connectivity(bool) override; @@ -358,7 +363,7 @@ class Domain final : public DomainBase, public std::enable_shared_from_this>& get_color_map() const = 0; [[nodiscard]] virtual std::pair, suanpan::graph> get_element_connectivity(bool) = 0; @@ -372,7 +379,7 @@ class DomainBase : public Tag { virtual void assemble_mass_container() const = 0; virtual void assemble_stiffness_container() const = 0; - virtual void erase_machine_error() const = 0; + virtual void erase_machine_error(vec&) const = 0; virtual void update_load() = 0; virtual void update_constraint() = 0; diff --git a/Domain/DomainState.cpp b/Domain/DomainState.cpp index cd24d5b2f..127a36f8e 100644 --- a/Domain/DomainState.cpp +++ b/Domain/DomainState.cpp @@ -15,12 +15,12 @@ * along with this program. If not, see . ******************************************************************************/ +#include "Domain.h" #include #include #include #include -#include "Domain.h" -#include "FactoryHelper.hpp" +#include void Domain::update_current_resistance() const { get_trial_resistance(factory).zeros(); @@ -74,6 +74,9 @@ void Domain::assemble_resistance() const { }); suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_trial_resistance(trial_resistance(t_node->get_reordered_dof())); }); + + // update to sync incre_resistance + factory->update_trial_resistance(trial_resistance); } void Domain::assemble_damping_force() const { @@ -88,6 +91,9 @@ void Domain::assemble_damping_force() const { }); suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_trial_damping_force(trial_damping_force(t_node->get_reordered_dof())); }); + + // update to sync incre_damping_force + factory->update_trial_damping_force(trial_damping_force); } void Domain::assemble_inertial_force() const { @@ -102,6 +108,9 @@ void Domain::assemble_inertial_force() const { }); suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_trial_inertial_force(trial_inertial_force(t_node->get_reordered_dof())); }); + + // update to sync incre_inertial_force + factory->update_trial_inertial_force(trial_inertial_force); } void Domain::assemble_initial_mass() const { @@ -311,9 +320,8 @@ int Domain::update_trial_status() const { if(AnalysisType::DYNAMICS == factory->get_analysis_type()) suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_trial_status(trial_displacement, trial_velocity, trial_acceleration); }); else suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_trial_status(trial_displacement); }); - auto code = 0; + std::atomic_int code = 0; suanpan::for_all(element_pond.get(), [&](const shared_ptr& t_element) { code += t_element->update_status(); }); - return code; } @@ -325,22 +333,19 @@ int Domain::update_incre_status() const { if(AnalysisType::DYNAMICS == factory->get_analysis_type()) suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_incre_status(incre_displacement, incre_velocity, incre_acceleration); }); else suanpan::for_all(node_pond.get(), [&](const shared_ptr& t_node) { t_node->update_incre_status(incre_displacement); }); - auto code = 0; + std::atomic_int code = 0; suanpan::for_all(element_pond.get(), [&](const shared_ptr& t_element) { code += t_element->update_status(); }); - return code; } int Domain::update_current_status() const { - const auto& analysis_type = factory->get_analysis_type(); + const auto analysis_type = factory->get_analysis_type(); - if(vec c_g_dsp(factory->get_size(), fill::zeros); analysis_type == AnalysisType::STATICS || analysis_type == AnalysisType::BUCKLE) { - for(const auto& I : node_pond.get()) c_g_dsp(I->get_reordered_dof()) = I->get_current_displacement(); - factory->update_current_displacement(c_g_dsp); + if(analysis_type != AnalysisType::STATICS && analysis_type != AnalysisType::DYNAMICS && analysis_type != AnalysisType::BUCKLE) return SUANPAN_SUCCESS; - update_current_resistance(); - } - else if(analysis_type == AnalysisType::DYNAMICS) { + // collect initial nodal quantities into global storage + if(analysis_type == AnalysisType::DYNAMICS) { + vec c_g_dsp(factory->get_size(), fill::zeros); vec c_g_vel(factory->get_size(), fill::zeros); vec c_g_acc(factory->get_size(), fill::zeros); @@ -353,8 +358,33 @@ int Domain::update_current_status() const { factory->update_current_displacement(c_g_dsp); factory->update_current_velocity(c_g_vel); factory->update_current_acceleration(c_g_acc); + } + else { + vec c_g_dsp(factory->get_size(), fill::zeros); + + for(const auto& I : node_pond.get()) { + auto& t_dof = I->get_reordered_dof(); + c_g_dsp(t_dof) = I->get_current_displacement(); + } + factory->update_current_displacement(c_g_dsp); + } + + std::atomic_int code = 0; + suanpan::for_all(element_pond.get(), [&](const shared_ptr& t_element) { code += t_element->update_status(); }); + + if(SUANPAN_SUCCESS != code) { + suanpan_error("initial conditions cause significant non-linearity, use an analysis step instead.\n"); + return SUANPAN_FAIL; + } + + // commit element status + suanpan::for_all(element_pond.get(), [](const shared_ptr& t_element) { + t_element->Element::commit_status(); + t_element->commit_status(); + }); - update_current_resistance(); + update_current_resistance(); + if(analysis_type == AnalysisType::DYNAMICS) { update_current_damping_force(); update_current_inertial_force(); } diff --git a/Domain/Factory.hpp b/Domain/Factory.hpp index b8686fee4..22ce096af 100644 --- a/Domain/Factory.hpp +++ b/Domain/Factory.hpp @@ -167,6 +167,8 @@ template class Factory final { template friend unique_ptr> get_basic_container(const Factory*); template friend unique_ptr> get_matrix_container(const Factory*); + void assemble_matrix_helper(shared_ptr>&, const Mat&, const uvec&); + public: const bool initialized = false; @@ -206,8 +208,8 @@ template class Factory final { void set_reference_dof(const suanpan::set&); [[nodiscard]] const suanpan::set& get_reference_dof() const; - void set_error(const T&); - const T& get_error() const; + void set_error(T); + T get_error() const; /*************************INITIALIZER*************************/ @@ -240,7 +242,7 @@ template class Factory final { void set_reference_load(const SpMat&); - void set_trial_time(const T&); + void set_trial_time(T); void set_trial_load_factor(const Col&); void set_trial_load(const Col&); void set_trial_settlement(const Col&); @@ -252,7 +254,7 @@ template class Factory final { void set_trial_acceleration(const Col&); void set_trial_temperature(const Col&); - void set_incre_time(const T&); + void set_incre_time(T); void set_incre_load_factor(const Col&); void set_incre_load(const Col&); void set_incre_settlement(const Col&); @@ -264,7 +266,7 @@ template class Factory final { void set_incre_acceleration(const Col&); void set_incre_temperature(const Col&); - void set_current_time(const T&); + void set_current_time(T); void set_current_load_factor(const Col&); void set_current_load(const Col&); void set_current_settlement(const Col&); @@ -276,7 +278,7 @@ template class Factory final { void set_current_acceleration(const Col&); void set_current_temperature(const Col&); - void set_pre_time(const T&); + void set_pre_time(T); void set_pre_load_factor(const Col&); void set_pre_load(const Col&); void set_pre_settlement(const Col&); @@ -320,7 +322,7 @@ template class Factory final { T get_complementary_energy(); const Col& get_momentum(); - const T& get_trial_time() const; + T get_trial_time() const; const Col& get_trial_load_factor() const; const Col& get_trial_load() const; const Col& get_trial_settlement() const; @@ -332,7 +334,7 @@ template class Factory final { const Col& get_trial_acceleration() const; const Col& get_trial_temperature() const; - const T& get_incre_time() const; + T get_incre_time() const; const Col& get_incre_load_factor() const; const Col& get_incre_load() const; const Col& get_incre_settlement() const; @@ -344,7 +346,7 @@ template class Factory final { const Col& get_incre_acceleration() const; const Col& get_incre_temperature() const; - const T& get_current_time() const; + T get_current_time() const; const Col& get_current_load_factor() const; const Col& get_current_load() const; const Col& get_current_settlement() const; @@ -356,7 +358,7 @@ template class Factory final { const Col& get_current_acceleration() const; const Col& get_current_temperature() const; - const T& get_pre_time() const; + T get_pre_time() const; const Col& get_pre_load_factor() const; const Col& get_pre_load() const; const Col& get_pre_settlement() const; @@ -393,7 +395,7 @@ template class Factory final { /*************************UPDATER*************************/ - void update_trial_time(const T&); + void update_trial_time(T); void update_trial_load_factor(const Col&); void update_trial_load(const Col&); void update_trial_settlement(const Col&); @@ -405,7 +407,7 @@ template class Factory final { void update_trial_acceleration(const Col&); void update_trial_temperature(const Col&); - void update_incre_time(const T&); + void update_incre_time(T); void update_incre_load_factor(const Col&); void update_incre_load(const Col&); void update_incre_settlement(const Col&); @@ -417,7 +419,7 @@ template class Factory final { void update_incre_acceleration(const Col&); void update_incre_temperature(const Col&); - void update_current_time(const T&); + void update_current_time(T); void update_current_load_factor(const Col&); void update_current_load(const Col&); void update_current_settlement(const Col&); @@ -429,7 +431,7 @@ template class Factory final { void update_current_acceleration(const Col&); void update_current_temperature(const Col&); - void update_trial_time_by(const T&); + void update_trial_time_by(T); void update_trial_load_factor_by(const Col&); void update_trial_load_by(const Col&); void update_trial_settlement_by(const Col&); @@ -441,7 +443,7 @@ template class Factory final { void update_trial_acceleration_by(const Col&); void update_trial_temperature_by(const Col&); - void update_incre_time_by(const T&); + void update_incre_time_by(T); void update_incre_load_factor_by(const Col&); void update_incre_load_by(const Col&); void update_incre_settlement_by(const Col&); @@ -453,7 +455,7 @@ template class Factory final { void update_incre_acceleration_by(const Col&); void update_incre_temperature_by(const Col&); - void update_current_time_by(const T&); + void update_current_time_by(T); void update_current_load_factor_by(const Col&); void update_current_load_by(const Col&); void update_current_settlement_by(const Col&); @@ -482,7 +484,7 @@ template class Factory final { template friend SpCol& get_trial_constraint_resistance(const shared_ptr>&); template friend SpCol& get_current_constraint_resistance(const shared_ptr>&); - template friend T& get_trial_time(const shared_ptr>&); + template friend T1& get_trial_time(const shared_ptr>&); template friend Col& get_trial_load_factor(const shared_ptr>&); template friend Col& get_trial_load(const shared_ptr>&); template friend Col& get_trial_settlement(const shared_ptr>&); @@ -494,7 +496,7 @@ template class Factory final { template friend Col& get_trial_acceleration(const shared_ptr>&); template friend Col& get_trial_temperature(const shared_ptr>&); - template friend T& get_incre_time(const shared_ptr>&); + template friend T1& get_incre_time(const shared_ptr>&); template friend Col& get_incre_load_factor(const shared_ptr>&); template friend Col& get_incre_load(const shared_ptr>&); template friend Col& get_incre_settlement(const shared_ptr>&); @@ -506,7 +508,7 @@ template class Factory final { template friend Col& get_incre_acceleration(const shared_ptr>&); template friend Col& get_incre_temperature(const shared_ptr>&); - template friend T& get_current_time(const shared_ptr>&); + template friend T1& get_current_time(const shared_ptr>&); template friend Col& get_current_load_factor(const shared_ptr>&); template friend Col& get_current_load(const shared_ptr>&); template friend Col& get_current_settlement(const shared_ptr>&); @@ -518,7 +520,7 @@ template class Factory final { template friend Col& get_current_acceleration(const shared_ptr>&); template friend Col& get_current_temperature(const shared_ptr>&); - template friend T& get_pre_time(const shared_ptr>&); + template friend T1& get_pre_time(const shared_ptr>&); template friend Col& get_pre_load_factor(const shared_ptr>&); template friend Col& get_pre_load(const shared_ptr>&); template friend Col& get_pre_settlement(const shared_ptr>&); @@ -702,9 +704,9 @@ template void Factory::set_reference_dof(const suanpan::set& D template const suanpan::set& Factory::get_reference_dof() const { return reference_dof; } -template void Factory::set_error(const T& E) { error = E; } +template void Factory::set_error(const T E) { error = E; } -template const T& Factory::get_error() const { return error; } +template T Factory::get_error() const { return error; } template int Factory::initialize() { reference_dof.clear(); // clear reference dof vector in every step @@ -857,7 +859,7 @@ template void Factory::set_mpc(const unsigned S) { template void Factory::set_reference_load(const SpMat& L) { reference_load = L; } -template void Factory::set_trial_time(const T& M) { trial_time = M; } +template void Factory::set_trial_time(const T M) { trial_time = M; } template void Factory::set_trial_load_factor(const Col& L) { trial_load_factor = L; } @@ -879,7 +881,7 @@ template void Factory::set_trial_acceleration(const Col& A) { tria template void Factory::set_trial_temperature(const Col& M) { trial_temperature = M; } -template void Factory::set_incre_time(const T& M) { incre_time = M; } +template void Factory::set_incre_time(const T M) { incre_time = M; } template void Factory::set_incre_load_factor(const Col& L) { incre_load_factor = L; } @@ -901,7 +903,7 @@ template void Factory::set_incre_acceleration(const Col& A) { incr template void Factory::set_incre_temperature(const Col& M) { incre_temperature = M; } -template void Factory::set_current_time(const T& M) { current_time = M; } +template void Factory::set_current_time(const T M) { current_time = M; } template void Factory::set_current_load_factor(const Col& L) { current_load_factor = L; } @@ -923,7 +925,7 @@ template void Factory::set_current_acceleration(const Col& A) { cu template void Factory::set_current_temperature(const Col& M) { current_temperature = M; } -template void Factory::set_pre_time(const T& M) { pre_time = M; } +template void Factory::set_pre_time(const T M) { pre_time = M; } template void Factory::set_pre_load_factor(const Col& L) { pre_load_factor = L; } @@ -989,7 +991,7 @@ template T Factory::get_complementary_energy() { return complementary template const Col& Factory::get_momentum() { return momentum; } -template const T& Factory::get_trial_time() const { return trial_time; } +template T Factory::get_trial_time() const { return trial_time; } template const Col& Factory::get_trial_load_factor() const { return trial_load_factor; } @@ -1011,7 +1013,7 @@ template const Col& Factory::get_trial_acceleration() const { retu template const Col& Factory::get_trial_temperature() const { return trial_temperature; } -template const T& Factory::get_incre_time() const { return incre_time; } +template T Factory::get_incre_time() const { return incre_time; } template const Col& Factory::get_incre_load_factor() const { return incre_load_factor; } @@ -1033,7 +1035,7 @@ template const Col& Factory::get_incre_acceleration() const { retu template const Col& Factory::get_incre_temperature() const { return incre_temperature; } -template const T& Factory::get_current_time() const { return current_time; } +template T Factory::get_current_time() const { return current_time; } template const Col& Factory::get_current_load_factor() const { return current_load_factor; } @@ -1055,7 +1057,7 @@ template const Col& Factory::get_current_acceleration() const { re template const Col& Factory::get_current_temperature() const { return current_temperature; } -template const T& Factory::get_pre_time() const { return pre_time; } +template T Factory::get_pre_time() const { return pre_time; } template const Col& Factory::get_pre_load_factor() const { return pre_load_factor; } @@ -1111,7 +1113,7 @@ template const Col& Factory::get_eigenvalue() const { return eigen template const Mat& Factory::get_eigenvector() const { return eigenvector; } -template void Factory::update_trial_time(const T& M) { +template void Factory::update_trial_time(const T M) { trial_time = M; incre_time = trial_time - current_time; } @@ -1166,7 +1168,7 @@ template void Factory::update_trial_temperature(const Col& M) { incre_temperature = trial_temperature - current_temperature; } -template void Factory::update_incre_time(const T& M) { +template void Factory::update_incre_time(const T M) { incre_time = M; trial_time = current_time + incre_time; } @@ -1221,7 +1223,7 @@ template void Factory::update_incre_temperature(const Col& M) { trial_temperature = current_temperature + incre_temperature; } -template void Factory::update_current_time(const T& M) { +template void Factory::update_current_time(const T M) { trial_time = current_time = M; incre_time = 0.; } @@ -1276,7 +1278,7 @@ template void Factory::update_current_temperature(const Col& M) { incre_temperature.zeros(); } -template void Factory::update_trial_time_by(const T& M) { +template void Factory::update_trial_time_by(const T M) { trial_time += M; incre_time = trial_time - current_time; } @@ -1331,7 +1333,7 @@ template void Factory::update_trial_temperature_by(const Col& M) { incre_temperature = trial_temperature - current_temperature; } -template void Factory::update_incre_time_by(const T& M) { +template void Factory::update_incre_time_by(const T M) { incre_time += M; trial_time = current_time + incre_time; } @@ -1386,7 +1388,7 @@ template void Factory::update_incre_temperature_by(const Col& M) { trial_temperature = current_temperature + incre_temperature; } -template void Factory::update_current_time_by(const T& M) { +template void Factory::update_current_time_by(const T M) { trial_time = current_time += M; incre_time = 0.; } @@ -1464,6 +1466,8 @@ template void Factory::clear_energy() { } template void Factory::commit_status() { + ninja.zeros(); + commit_energy(); commit_time(); @@ -1814,25 +1818,29 @@ template void Factory::assemble_inertial_force(const Mat& ER, cons for(unsigned I = 0; I < EI.n_elem; ++I) trial_inertial_force(EI(I)) += ER(I); } -template void Factory::assemble_mass(const Mat& EM, const uvec& EI) { +/** + * \brief Assemble given elemental matrix into global matrix + * \param GM global matrix + * \param EM elemental matrix + * \param EI elemental matrix indices + */ +template void Factory::assemble_matrix_helper(shared_ptr>& GM, const Mat& EM, const uvec& EI) { if(EM.is_empty()) return; - for(unsigned I = 0; I < EI.n_elem; ++I) for(unsigned J = 0; J < EI.n_elem; ++J) global_mass->at(EI(J), EI(I)) += EM(J, I); -} -template void Factory::assemble_damping(const Mat& EC, const uvec& EI) { - if(EC.is_empty()) return; - for(unsigned I = 0; I < EI.n_elem; ++I) for(unsigned J = 0; J < EI.n_elem; ++J) global_damping->at(EI(J), EI(I)) += EC(J, I); + if(StorageScheme::BANDSYMM == storage_type || StorageScheme::SYMMPACK == storage_type) { + const uvec NEI = sort_index(EI); + for(unsigned I = 0; I < NEI.n_elem; ++I) for(unsigned J = 0; J <= I; ++J) GM->unsafe_at(EI(NEI(I)), EI(NEI(J))) += EM(NEI(I), NEI(J)); + } + else for(unsigned I = 0; I < EI.n_elem; ++I) for(unsigned J = 0; J < EI.n_elem; ++J) GM->unsafe_at(EI(J), EI(I)) += EM(J, I); } -template void Factory::assemble_stiffness(const Mat& EK, const uvec& EI) { - if(EK.is_empty()) return; - for(unsigned I = 0; I < EI.n_elem; ++I) for(unsigned J = 0; J < EI.n_elem; ++J) global_stiffness->at(EI(J), EI(I)) += EK(J, I); -} +template void Factory::assemble_mass(const Mat& EM, const uvec& EI) { this->assemble_matrix_helper(global_mass, EM, EI); } -template void Factory::assemble_geometry(const Mat& EG, const uvec& EI) { - if(EG.is_empty() || !nlgeom) return; - for(unsigned I = 0; I < EI.n_elem; ++I) for(unsigned J = 0; J < EI.n_elem; ++J) global_geometry->at(EI(J), EI(I)) += EG(J, I); -} +template void Factory::assemble_damping(const Mat& EC, const uvec& EI) { this->assemble_matrix_helper(global_damping, EC, EI); } + +template void Factory::assemble_stiffness(const Mat& EK, const uvec& EI) { this->assemble_matrix_helper(global_stiffness, EK, EI); } + +template void Factory::assemble_geometry(const Mat& EG, const uvec& EI) { this->assemble_matrix_helper(global_geometry, EG, EI); } template void Factory::assemble_stiffness(const SpMat& EK, const uvec& EI) { if(EK.is_empty()) return; diff --git a/Domain/MetaMat/BandMat.hpp b/Domain/MetaMat/BandMat.hpp index 7a14fffe8..5a84959df 100644 --- a/Domain/MetaMat/BandMat.hpp +++ b/Domain/MetaMat/BandMat.hpp @@ -54,6 +54,7 @@ template class BandMat final : public DenseMat { void nullify(uword) override; const T& operator()(uword, uword) const override; + T& unsafe_at(uword, uword) override; T& at(uword, uword) override; Mat operator*(const Mat&) const override; @@ -90,6 +91,11 @@ template const T& BandMat::operator()(const uword in_row, const uword return this->memory[in_row + s_band + in_col * (m_rows - 1)]; } +template T& BandMat::unsafe_at(const uword in_row, const uword in_col) { + this->factored = false; + return access::rw(this->memory[in_row + s_band + in_col * (m_rows - 1)]); +} + template T& BandMat::at(const uword in_row, const uword in_col) { if(in_row > in_col + l_band || in_row + u_band < in_col) return bin = 0.; this->factored = false; diff --git a/Domain/MetaMat/BandSymmMat.hpp b/Domain/MetaMat/BandSymmMat.hpp index 4aa2b65d3..159a77c46 100644 --- a/Domain/MetaMat/BandSymmMat.hpp +++ b/Domain/MetaMat/BandSymmMat.hpp @@ -52,6 +52,7 @@ template class BandSymmMat final : public DenseMat { void nullify(uword) override; const T& operator()(uword, uword) const override; + T& unsafe_at(uword, uword) override; T& at(uword, uword) override; Mat operator*(const Mat&) const override; @@ -76,7 +77,8 @@ template void BandSymmMat::unify(const uword K) { template void BandSymmMat::nullify(const uword K) { suanpan_for(std::max(band, K) - band, K, [&](const uword I) { access::rw(this->memory[K - I + I * m_rows]) = 0.; }); - suanpan_for(K, std::min(this->n_rows, K + band + 1), [&](const uword I) { access::rw(this->memory[I - K + K * m_rows]) = 0.; }); + const auto t_factor = K * m_rows - K; + suanpan_for(K, std::min(this->n_rows, K + band + 1), [&](const uword I) { access::rw(this->memory[I + t_factor]) = 0.; }); this->factored = false; } @@ -86,8 +88,13 @@ template const T& BandSymmMat::operator()(const uword in_row, const u return this->memory[in_row > in_col ? in_row - in_col + in_col * m_rows : in_col - in_row + in_row * m_rows]; } +template T& BandSymmMat::unsafe_at(const uword in_row, const uword in_col) { + this->factored = false; + return access::rw(this->memory[in_row - in_col + in_col * m_rows]); +} + template T& BandSymmMat::at(const uword in_row, const uword in_col) { - if(in_row > band + in_col || in_row < in_col) return bin = 0.; + if(in_row > band + in_col || in_row < in_col) [[unlikely]] return bin = 0.; this->factored = false; return access::rw(this->memory[in_row - in_col + in_col * m_rows]); } diff --git a/Domain/MetaMat/DenseMat.hpp b/Domain/MetaMat/DenseMat.hpp index 8c65377b6..1e0f50c79 100644 --- a/Domain/MetaMat/DenseMat.hpp +++ b/Domain/MetaMat/DenseMat.hpp @@ -136,7 +136,12 @@ template void DenseMat::zeros() { this->factored = false; } -template T DenseMat::max() const { return op_max::direct_max(memptr(), this->n_elem); } +template T DenseMat::max() const { + T max_value = T(1); + const auto t_size = std::min(this->n_rows, this->n_cols); + for(uword I = 0; I < t_size; ++I) if(const auto t_val = this->operator()(I, I); t_val > max_value) max_value = t_val; + return max_value; +} template Col DenseMat::diag() const { Col diag_vec(std::min(this->n_rows, this->n_cols), fill::none); diff --git a/Domain/MetaMat/MetaMat.hpp b/Domain/MetaMat/MetaMat.hpp index b4afac243..d1392f301 100644 --- a/Domain/MetaMat/MetaMat.hpp +++ b/Domain/MetaMat/MetaMat.hpp @@ -72,7 +72,20 @@ template class MetaMat { [[nodiscard]] virtual T max() const = 0; [[nodiscard]] virtual Col diag() const = 0; + /** + * \brief Access element (read-only), returns zero if out-of-bound + * \return value + */ virtual const T& operator()(uword, uword) const = 0; + /** + * \brief Access element without bound check + * \return value + */ + virtual T& unsafe_at(uword, uword); + /** + * \brief Access element with bound check + * \return value + */ virtual T& at(uword, uword) = 0; [[nodiscard]] virtual const T* memptr() const = 0; @@ -153,6 +166,8 @@ template SolverSetting& MetaMat::get_solver_setting() { return set template void MetaMat::set_factored(const bool F) { factored = F; } +template T& MetaMat::unsafe_at(const uword I, const uword J) { return this->at(I, J); } + template int MetaMat::direct_solve(Mat& X, const SpMat& B) { return this->direct_solve(X, Mat(B)); } template int MetaMat::direct_solve(Mat& X, Mat&& B) { return this->direct_solve(X, B); } diff --git a/Domain/MetaMat/SymmPackMat.hpp b/Domain/MetaMat/SymmPackMat.hpp index 9dd43905f..2f9cfa1a8 100644 --- a/Domain/MetaMat/SymmPackMat.hpp +++ b/Domain/MetaMat/SymmPackMat.hpp @@ -19,8 +19,8 @@ * @brief A SymmPackMat class that holds matrices. * * @author tlc - * @date 06/09/2017 - * @version 0.1.0 + * @date 13/11/2022 + * @version 0.2.0 * @file SymmPackMat.hpp * @addtogroup MetaMat * @{ @@ -33,10 +33,12 @@ #include "DenseMat.hpp" template class SymmPackMat final : public DenseMat { - static constexpr char UPLO = 'U'; + static constexpr char UPLO = 'L'; static T bin; + const uword length; // 2n-1 + int solve_trs(Mat&, Mat&&); int solve_trs(Mat&, const Mat&); @@ -49,6 +51,7 @@ template class SymmPackMat final : public DenseMat { void nullify(uword) override; const T& operator()(uword, uword) const override; + T& unsafe_at(uword, uword) override; T& at(uword, uword) override; Mat operator*(const Mat&) const override; @@ -60,49 +63,52 @@ template class SymmPackMat final : public DenseMat { template T SymmPackMat::bin = 0.; template SymmPackMat::SymmPackMat(const uword in_size) - : DenseMat(in_size, in_size, (in_size + 1) * in_size / 2) {} + : DenseMat(in_size, in_size, (in_size + 1) * in_size / 2) + , length(2 * in_size - 1) {} template unique_ptr> SymmPackMat::make_copy() { return std::make_unique>(*this); } template void SymmPackMat::unify(const uword K) { nullify(K); - access::rw(this->memory[(K * K + 3 * K) / 2]) = 1.; + access::rw(this->memory[(length - K + 2) * K / 2]) = 1.; } template void SymmPackMat::nullify(const uword K) { - suanpan_for(0llu, K, [&](const uword I) { access::rw(this->memory[(K * K + K) / 2 + I]) = 0.; }); - suanpan_for(K, this->n_rows, [&](const uword I) { access::rw(this->memory[(I * I + I) / 2 + K]) = 0.; }); + suanpan_for(0llu, K, [&](const uword I) { access::rw(this->memory[K + (length - I) * I / 2]) = 0.; }); + const auto t_factor = (length - K) * K / 2; + suanpan_for(K, this->n_rows, [&](const uword I) { access::rw(this->memory[I + t_factor]) = 0.; }); this->factored = false; } -template const T& SymmPackMat::operator()(const uword in_row, const uword in_col) const { return this->memory[in_col > in_row ? (in_col * in_col + in_col) / 2 + in_row : (in_row * in_row + in_row) / 2 + in_col]; } +template const T& SymmPackMat::operator()(const uword in_row, const uword in_col) const { return this->memory[in_row >= in_col ? in_row + (length - in_col) * in_col / 2 : in_col + (length - in_row) * in_row / 2]; } -template T& SymmPackMat::at(const uword in_row, const uword in_col) { - if(in_col < in_row) return bin; +template T& SymmPackMat::unsafe_at(const uword in_row, const uword in_col) { this->factored = false; - return access::rw(this->memory[(in_col * in_col + in_col) / 2 + in_row]); + return access::rw(this->memory[in_row + (length - in_col) * in_col / 2]); } -template Mat spmm(const SymmPackMat& A, const Mat& B); +template T& SymmPackMat::at(const uword in_row, const uword in_col) { + if(in_row < in_col) [[unlikely]] return bin; + this->factored = false; + return access::rw(this->memory[in_row + (length - in_col) * in_col / 2]); +} template Mat SymmPackMat::operator*(const Mat& X) const { - if(!X.is_colvec()) return spmm<'R', 'N'>(*this, X); - - auto Y = X; + auto Y = Mat(arma::size(X), fill::none); const auto N = static_cast(this->n_rows); - constexpr auto INC = 1; + const auto INC = 1; T ALPHA = 1.; T BETA = 0.; if(std::is_same_v) { using E = float; - arma_fortran(arma_sspmv)(&UPLO, &N, (E*)&ALPHA, (E*)this->memptr(), (E*)X.memptr(), &INC, (E*)&BETA, (E*)Y.memptr(), &INC); + suanpan_for(0llu, X.n_cols, [&](const uword I) { arma_fortran(arma_sspmv)(&UPLO, &N, (E*)&ALPHA, (E*)this->memptr(), (E*)X.colptr(I), &INC, (E*)&BETA, (E*)Y.colptr(I), &INC); }); } else if(std::is_same_v) { using E = double; - arma_fortran(arma_dspmv)(&UPLO, &N, (E*)&ALPHA, (E*)this->memptr(), (E*)X.memptr(), &INC, (E*)&BETA, (E*)Y.memptr(), &INC); + suanpan_for(0llu, X.n_cols, [&](const uword I) { arma_fortran(arma_dspmv)(&UPLO, &N, (E*)&ALPHA, (E*)this->memptr(), (E*)X.colptr(I), &INC, (E*)&BETA, (E*)Y.colptr(I), &INC); }); } return Y; @@ -175,7 +181,7 @@ template int SymmPackMat::solve_trs(Mat& X, const Mat& B) { X += incre; - suanpan_debug("mixed precision algorithm multiplier: %.5E.\n", multiplier = norm(full_residual -= this->operator*(incre))); + suanpan_debug("mixed precision algorithm multiplier: %.5E.\n", multiplier = arma::norm(full_residual -= this->operator*(incre))); } } @@ -249,7 +255,7 @@ template int SymmPackMat::solve_trs(Mat& X, Mat&& B) { X += incre; - suanpan_debug("mixed precision algorithm multiplier: %.5E.\n", multiplier = norm(B -= this->operator*(incre))); + suanpan_debug("mixed precision algorithm multiplier: %.5E.\n", multiplier = arma::norm(B -= this->operator*(incre))); } } diff --git a/Domain/MetaMat/csc_form.hpp b/Domain/MetaMat/csc_form.hpp index 3d1ef8fc1..24bc750a7 100644 --- a/Domain/MetaMat/csc_form.hpp +++ b/Domain/MetaMat/csc_form.hpp @@ -78,7 +78,7 @@ template class csc_form final { [[nodiscard]] data_t max() const { if(0 == n_elem) return data_t(0); - return *std::max_element(val_idx.get(), val_idx.get() + n_elem); + return *suanpan_max_element(val_idx.get(), val_idx.get() + n_elem); } void print() const; diff --git a/Domain/MetaMat/csr_form.hpp b/Domain/MetaMat/csr_form.hpp index cf0e96d29..5e2a7c8c7 100644 --- a/Domain/MetaMat/csr_form.hpp +++ b/Domain/MetaMat/csr_form.hpp @@ -72,7 +72,7 @@ template class csr_form final { [[nodiscard]] data_t max() const { if(0 == n_elem) return data_t(0); - return *std::max_element(val_idx.get(), val_idx.get() + n_elem); + return *suanpan_max_element(val_idx.get(), val_idx.get() + n_elem); } void print() const; diff --git a/Domain/MetaMat/operator_times.hpp b/Domain/MetaMat/operator_times.hpp index e3d56cba9..90fe03ef2 100644 --- a/Domain/MetaMat/operator_times.hpp +++ b/Domain/MetaMat/operator_times.hpp @@ -42,6 +42,11 @@ template const shared_ptr>& operator*=(const shared_ptr const unique_ptr>& operator*=(const unique_ptr>& M, const T value) { + M->operator*=(value); + return M; +} + template const shared_ptr>& operator+=(const shared_ptr>& M, const shared_ptr>& A) { M->operator+=(A); return M; @@ -152,66 +157,6 @@ template Mat operator*(const Mat& A, const FullMat& B) { return C; } -template Mat spmm(const SymmPackMat& A, const Mat& B) { - Mat C; - - const auto SIDE = S; - const auto TRAN = T; - constexpr auto UPLO = 'U'; - - auto M = static_cast(A.n_rows); - - auto PT = 0; - if constexpr(SIDE == 'L') PT += 1; - if constexpr(TRAN == 'T') PT += 10; - - int N, LDC; - - switch(PT) { - case 0: // A*B - N = static_cast(B.n_cols); - C.set_size(M, N); - LDC = M; - break; - case 1: // B*A - N = static_cast(B.n_rows); - C.set_size(N, M); - LDC = N; - break; - case 10: // A*B**T - N = static_cast(B.n_rows); - C.set_size(M, N); - LDC = M; - break; - case 11: // B**T*A - N = static_cast(B.n_cols); - C.set_size(N, M); - LDC = N; - break; - default: - break; - } - - T1 ALPHA = 1.; - const auto LDB = static_cast(B.n_rows); - T1 BETA = 0.; - - if(std::is_same_v) { - using E = float; - arma_fortran(arma_sspmm)(&SIDE, &UPLO, &TRAN, &M, &N, (E*)A.memptr(), (E*)&ALPHA, (E*)B.memptr(), &LDB, (E*)&BETA, (E*)C.memptr(), &LDC); - } - else if(std::is_same_v) { - using E = double; - arma_fortran(arma_dspmm)(&SIDE, &UPLO, &TRAN, &M, &N, (E*)A.memptr(), (E*)&ALPHA, (E*)B.memptr(), &LDB, (E*)&BETA, (E*)C.memptr(), &LDC); - } - - return C; -} - -template Mat operator*(const Mat& A, const SymmPackMat& B) { return spmm<'L', 'N'>(B, A); } - -template Mat operator*(const Op, op_htrans>& A, const SymmPackMat& B) { return spmm<'L', 'T'>(B, A.m); } - template triplet_form operator*(const T value, const triplet_form& M) { auto N = M; N *= value; diff --git a/Domain/MetaMat/triplet_form.hpp b/Domain/MetaMat/triplet_form.hpp index 6f411d6a3..318fe489c 100644 --- a/Domain/MetaMat/triplet_form.hpp +++ b/Domain/MetaMat/triplet_form.hpp @@ -170,7 +170,7 @@ template class triplet_form final { [[nodiscard]] data_t max() const { if(is_empty()) return data_t(0); - return *std::max_element(val_idx.get(), val_idx.get() + n_elem); + return *suanpan_max_element(val_idx.get(), val_idx.get() + n_elem); } void zeros() { diff --git a/Element/Beam/EB21.cpp b/Element/Beam/EB21.cpp index c4213f704..d6cbaf3be 100644 --- a/Element/Beam/EB21.cpp +++ b/Element/Beam/EB21.cpp @@ -77,12 +77,10 @@ int EB21::reset_status() { } vector EB21::record(const OutputType P) { - vector output; + if(P == OutputType::BEAME) return {b_trans->to_local_vec(get_current_displacement())}; + if(P == OutputType::BEAMS) return {vec{local_stiff * b_trans->to_local_vec(get_current_displacement())}}; - if(P == OutputType::E) output.emplace_back(b_trans->to_local_vec(get_current_displacement())); - else if(P == OutputType::S) output.emplace_back(local_stiff * b_trans->to_local_vec(get_current_displacement())); - - return output; + return {}; } void EB21::print() { suanpan_info("An elastic B21 element%s", nlgeom ? " with corotational formulation.\n" : ".\n"); } diff --git a/Element/Cube/C3D20.cpp b/Element/Cube/C3D20.cpp index cc9b1676d..98f18a9a9 100644 --- a/Element/Cube/C3D20.cpp +++ b/Element/Cube/C3D20.cpp @@ -179,9 +179,7 @@ mat C3D20::compute_shape_function(const mat& coordinate, const unsigned order) c vector C3D20::record(const OutputType T) { vector data; - for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); - return data; } diff --git a/Element/Cube/C3D8.cpp b/Element/Cube/C3D8.cpp index 330ec1ee2..d943014f9 100644 --- a/Element/Cube/C3D8.cpp +++ b/Element/Cube/C3D8.cpp @@ -199,9 +199,7 @@ mat C3D8::compute_shape_function(const mat& coordinate, const unsigned order) co vector C3D8::record(const OutputType T) { vector data; - for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); - return data; } diff --git a/Element/Cube/C3D8I.cpp b/Element/Cube/C3D8I.cpp index 204dc20db..57e42681b 100644 --- a/Element/Cube/C3D8I.cpp +++ b/Element/Cube/C3D8I.cpp @@ -142,9 +142,7 @@ mat C3D8I::compute_shape_function(const mat& coordinate, const unsigned order) c vector C3D8I::record(const OutputType T) { vector data; - if(OutputType::E == T) for(const auto& I : int_pt) data.emplace_back(I.c_material->get_trial_strain()); - else if(OutputType::S == T) for(const auto& I : int_pt) data.emplace_back(I.c_material->get_trial_stress()); - else for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); + for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); return data; } diff --git a/Element/Cube/CIN3D8.cpp b/Element/Cube/CIN3D8.cpp index 1e018449e..7a9d120e2 100644 --- a/Element/Cube/CIN3D8.cpp +++ b/Element/Cube/CIN3D8.cpp @@ -228,17 +228,7 @@ int CIN3D8::reset_status() { vector CIN3D8::record(const OutputType T) { vector data; - switch(T) { - case OutputType::E: - for(const auto& I : int_pt) data.emplace_back(I.c_material->get_trial_strain()); - break; - case OutputType::S: - for(const auto& I : int_pt) data.emplace_back(I.c_material->get_trial_stress()); - break; - default: - for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); - break; - } + for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); return data; } diff --git a/Element/Cube/DC3D8.cpp b/Element/Cube/DC3D8.cpp index a726a6f95..24df8d054 100644 --- a/Element/Cube/DC3D8.cpp +++ b/Element/Cube/DC3D8.cpp @@ -131,9 +131,7 @@ vector DC3D8::record(const OutputType T) { if(T == OutputType::DAMAGE) return {get_current_displacement()(d_dof)}; vector data; - for(const auto& I : int_pt) for(const auto& J : I.c_material->record(T)) data.emplace_back(J); - return data; } diff --git a/Element/Membrane/Axisymmetric/CAX8.cpp b/Element/Membrane/Axisymmetric/CAX8.cpp index b74e81303..885c7eaa6 100644 --- a/Element/Membrane/Axisymmetric/CAX8.cpp +++ b/Element/Membrane/Axisymmetric/CAX8.cpp @@ -138,9 +138,7 @@ int CAX8::reset_status() { vector CAX8::record(const OutputType T) { vector data; - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(T)) data.emplace_back(J); - return data; } diff --git a/Element/Membrane/CSM/CSMQ.cpp b/Element/Membrane/CSM/CSMQ.cpp index ff491b75a..4c69d7e64 100644 --- a/Element/Membrane/CSM/CSMQ.cpp +++ b/Element/Membrane/CSM/CSMQ.cpp @@ -193,10 +193,7 @@ mat CSMQ::compute_shape_function(const mat& coordinate, const unsigned order) co vector CSMQ::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/CSM/CSMQ4.cpp b/Element/Membrane/CSM/CSMQ4.cpp index 43ff3573f..ee213e64a 100644 --- a/Element/Membrane/CSM/CSMQ4.cpp +++ b/Element/Membrane/CSM/CSMQ4.cpp @@ -185,7 +185,6 @@ mat CSMQ4::compute_shape_function(const mat& coordinate, const unsigned order) c vector CSMQ4::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::NMISES) { mat A(int_pt.size(), 4); diff --git a/Element/Membrane/CSM/CSMQ8.cpp b/Element/Membrane/CSM/CSMQ8.cpp index 07e3f38c3..614da8d95 100644 --- a/Element/Membrane/CSM/CSMQ8.cpp +++ b/Element/Membrane/CSM/CSMQ8.cpp @@ -185,10 +185,7 @@ mat CSMQ8::compute_shape_function(const mat& coordinate, const unsigned order) c vector CSMQ8::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Drilling/GQ12.cpp b/Element/Membrane/Drilling/GQ12.cpp index b6c199b3f..26debc57b 100644 --- a/Element/Membrane/Drilling/GQ12.cpp +++ b/Element/Membrane/Drilling/GQ12.cpp @@ -167,10 +167,7 @@ mat GQ12::compute_shape_function(const mat& coordinate, const unsigned order) co vector GQ12::record(const OutputType T) { vector data; - data.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(T)) data.emplace_back(J); - return data; } diff --git a/Element/Membrane/Infinite/CINP4.cpp b/Element/Membrane/Infinite/CINP4.cpp index d4d43429a..944095bd4 100644 --- a/Element/Membrane/Infinite/CINP4.cpp +++ b/Element/Membrane/Infinite/CINP4.cpp @@ -341,10 +341,7 @@ int CINP4::reset_status() { vector CINP4::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Mixed/PS.cpp b/Element/Membrane/Mixed/PS.cpp index 49830baa3..00fc3d01d 100644 --- a/Element/Membrane/Mixed/PS.cpp +++ b/Element/Membrane/Mixed/PS.cpp @@ -158,10 +158,7 @@ mat PS::compute_shape_function(const mat& coordinate, const unsigned order) cons vector PS::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Mixed/QE2.cpp b/Element/Membrane/Mixed/QE2.cpp index 7b75678b9..729c541cd 100644 --- a/Element/Membrane/Mixed/QE2.cpp +++ b/Element/Membrane/Mixed/QE2.cpp @@ -206,7 +206,6 @@ mat QE2::compute_shape_function(const mat& coordinate, const unsigned order) con vector QE2::record(const OutputType T) { vector data; - data.reserve(int_pt.size()); if(T == OutputType::E) for(const auto& I : int_pt) data.emplace_back(I.A * current_alpha); else if(T == OutputType::S) for(const auto& I : int_pt) data.emplace_back(I.P * current_beta); diff --git a/Element/Membrane/PFM/DCP4.cpp b/Element/Membrane/PFM/DCP4.cpp index 4499f1943..2c1ab700c 100644 --- a/Element/Membrane/PFM/DCP4.cpp +++ b/Element/Membrane/PFM/DCP4.cpp @@ -154,7 +154,6 @@ int DCP4::reset_status() { vector DCP4::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::NMISES) { mat A(int_pt.size(), 4); diff --git a/Element/Membrane/Plane/CP4.cpp b/Element/Membrane/Plane/CP4.cpp index 0d567d3ae..9ad36b2f0 100644 --- a/Element/Membrane/Plane/CP4.cpp +++ b/Element/Membrane/Plane/CP4.cpp @@ -376,7 +376,6 @@ mat CP4::compute_shape_function(const mat& coordinate, const unsigned order) con vector CP4::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::NMISES) { mat A(int_pt.size(), 4); diff --git a/Element/Membrane/Plane/CP4I.cpp b/Element/Membrane/Plane/CP4I.cpp index 259947f59..62e872bc1 100644 --- a/Element/Membrane/Plane/CP4I.cpp +++ b/Element/Membrane/Plane/CP4I.cpp @@ -347,10 +347,7 @@ mat CP4I::compute_shape_function(const mat& coordinate, const unsigned order) co vector CP4I::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Plane/CP5.cpp b/Element/Membrane/Plane/CP5.cpp index 9ed46b19c..0565fad67 100644 --- a/Element/Membrane/Plane/CP5.cpp +++ b/Element/Membrane/Plane/CP5.cpp @@ -178,10 +178,7 @@ mat CP5::compute_shape_function(const mat& coordinate, const unsigned order) con vector CP5::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Plane/CP7.cpp b/Element/Membrane/Plane/CP7.cpp index bc6cf69d9..24d0de549 100644 --- a/Element/Membrane/Plane/CP7.cpp +++ b/Element/Membrane/Plane/CP7.cpp @@ -178,10 +178,7 @@ mat CP7::compute_shape_function(const mat& coordinate, const unsigned order) con vector CP7::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Plane/CP8.cpp b/Element/Membrane/Plane/CP8.cpp index a8f01c3a1..6a4ae67f8 100644 --- a/Element/Membrane/Plane/CP8.cpp +++ b/Element/Membrane/Plane/CP8.cpp @@ -179,10 +179,7 @@ mat CP8::compute_shape_function(const mat& coordinate, const unsigned order) con vector CP8::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); - for(const auto& I : int_pt) for(const auto& J : I.m_material->record(P)) output.emplace_back(J); - return output; } diff --git a/Element/Membrane/Porous/PCPE4DC.cpp b/Element/Membrane/Porous/PCPE4DC.cpp index aa7f405df..6c7662918 100644 --- a/Element/Membrane/Porous/PCPE4DC.cpp +++ b/Element/Membrane/Porous/PCPE4DC.cpp @@ -187,7 +187,6 @@ mat PCPE4DC::compute_shape_function(const mat& coordinate, const unsigned order) vector PCPE4DC::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::NMISES) { mat A(int_pt.size(), 4); diff --git a/Element/Membrane/Porous/PCPE4UC.cpp b/Element/Membrane/Porous/PCPE4UC.cpp index 2b46ac52c..019c69bad 100644 --- a/Element/Membrane/Porous/PCPE4UC.cpp +++ b/Element/Membrane/Porous/PCPE4UC.cpp @@ -152,7 +152,6 @@ mat PCPE4UC::compute_shape_function(const mat& coordinate, const unsigned order) vector PCPE4UC::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::NMISES) { mat A(int_pt.size(), 4); diff --git a/Element/Membrane/Porous/PCPE8DC.cpp b/Element/Membrane/Porous/PCPE8DC.cpp index 73958ec3d..f2bb93972 100644 --- a/Element/Membrane/Porous/PCPE8DC.cpp +++ b/Element/Membrane/Porous/PCPE8DC.cpp @@ -187,7 +187,6 @@ mat PCPE8DC::compute_shape_function(const mat& coordinate, const unsigned order) vector PCPE8DC::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::PP) { const auto t_disp = get_current_displacement(); diff --git a/Element/Membrane/Porous/PCPE8UC.cpp b/Element/Membrane/Porous/PCPE8UC.cpp index ab8c8cb61..c8f9e9782 100644 --- a/Element/Membrane/Porous/PCPE8UC.cpp +++ b/Element/Membrane/Porous/PCPE8UC.cpp @@ -152,7 +152,6 @@ mat PCPE8UC::compute_shape_function(const mat& coordinate, const unsigned order) vector PCPE8UC::record(const OutputType P) { vector output; - output.reserve(int_pt.size()); if(P == OutputType::PP) { const auto t_disp = get_current_displacement(); diff --git a/Element/Special/Joint.cpp b/Element/Special/Joint.cpp index 38f4d69fe..dd392d5a6 100644 --- a/Element/Special/Joint.cpp +++ b/Element/Special/Joint.cpp @@ -72,9 +72,7 @@ int Joint::reset_status() { vector Joint::record(const OutputType P) { vector data; - for(const auto& I : j_material) for(auto& J : I->record(P)) data.emplace_back(J); - return data; } diff --git a/Element/Visualisation/vtkBase.h b/Element/Visualisation/vtkBase.h index 433590dd7..96c39a07f 100644 --- a/Element/Visualisation/vtkBase.h +++ b/Element/Visualisation/vtkBase.h @@ -43,6 +43,7 @@ using arma::mat; class vtkBase { #ifdef SUANPAN_VTK + protected: vtkSmartPointer vtk_cell; diff --git a/Enhancement/suanPan.iss b/Enhancement/suanPan.iss index 42ac283a2..75d6eaedc 100644 --- a/Enhancement/suanPan.iss +++ b/Enhancement/suanPan.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "suanPan" -#define MyAppVersion "2.6.1" +#define MyAppVersion "2.7" #define MyAppPublisher "Theodore Chang" #define MyAppURL "https://github.com/TLCFEM/suanPan" #define MyAppExeName "suanPan.exe" @@ -32,7 +32,7 @@ InfoAfterFile=.\POST.txt PrivilegesRequiredOverridesAllowed=dialog OutputDir={#StoragePath}\installer OutputBaseFilename=suanPan-win-mkl-vtk -SetupIconFile=..\Resource\suanPan-Papirus.ico +SetupIconFile=..\Resource\suanPan-3.ico Compression=lzma SolidCompression=yes WizardStyle=modern @@ -58,7 +58,6 @@ Source: "{#StoragePath}tbbmalloc.dll"; DestDir: "{app}"; Flags: ignoreversion Source: "{#StoragePath}tbbmalloc_proxy.dll"; DestDir: "{app}"; Flags: ignoreversion Source: "{#StoragePath}updater.exe"; DestDir: "{app}"; Flags: ignoreversion; Tasks: updater Source: "{#StoragePath}msvcp140.dll"; DestDir: "{app}"; Flags: ignoreversion; Tasks: vcredist -Source: "{#StoragePath}msvcp140_atomic_wait.dll"; DestDir: "{app}"; Flags: ignoreversion; Tasks: vcredist Source: "{#StoragePath}vcruntime140.dll"; DestDir: "{app}"; Flags: ignoreversion; Tasks: vcredist Source: "{#StoragePath}vcruntime140_1.dll"; DestDir: "{app}"; Flags: ignoreversion; Tasks: vcredist Source: ".\AddAssociation.bat"; DestDir: "{app}"; Flags: ignoreversion diff --git a/Enhancement/suanPan.sublime-completions b/Enhancement/suanPan.sublime-completions index 01d01accf..6ac7c0348 100644 --- a/Enhancement/suanPan.sublime-completions +++ b/Enhancement/suanPan.sublime-completions @@ -244,6 +244,7 @@ "AbsDisp", "AbsError", "AbsIncreDisp", + "AbsIncreAcc", "AbsIncreEnergy", "AbsResidual", "ArcLength", @@ -296,6 +297,8 @@ "Decay", "Dhakal", "Dynamic", + "ImplicitDynamic", + "ExplicitDynamic", "EU2D", "EU3D", "Eigen", @@ -556,6 +559,7 @@ "RelDisp", "RelError", "RelIncreDisp", + "RelIncreAcc", "RelIncreEnergy", "RelResidual", "Rotation2D", @@ -1798,17 +1802,35 @@ "trigger":"import" }, { - "contents":"integrator BatheTwoStep ${1:(1)}\n# (1) int, unique integrator tag", + "contents":"integrator Tchamwa ${1:(1)} ${2:(2)}\n# (1) int, unique integrator tag\n# [2] double, spectral radius, default: 0.5", + "details":"", + "kind":"type", + "trigger":"Tchamwa" + }, + { + "contents":"integrator BatheTwoStep ${1:(1)} ${2:(2)} ${3:(3)}\n# (1) int, unique integrator tag\n# [2] double, spectral radius, default: 0\n# [3] double, sub-step size, default: 0.5", "details":"", "kind":"type", "trigger":"BatheTwoStep" }, { - "contents":"integrator GSSSSOptimal ${1:(1)} ${2:(2)}\n# (1) int, unique integrator tag\n# (2) double, spectral radius", + "contents":"integrator BatheExplicit ${1:(1)} ${2:(2)}\n# (1) int, unique integrator tag\n# [2] double, spectral radius, default: 0.5", + "details":"", + "kind":"type", + "trigger":"BatheExplicit" + }, + { + "contents":"integrator GSSSSOptimal ${1:(1)} ${2:[2]}\n# (1) int, unique integrator tag\n# [2] double, spectral radius, default: 0.5", "details":"Optimal scheme for GSSSS", "kind":"type", "trigger":"GSSSSOptimal" }, + { + "contents":"integrator OALTS ${1:(1)} ${2:[2]}\n# (1) int, unique integrator tag\n# [2] double, spectral radius, default: 0.5", + "details":"OALTS doi:10.1002/nme.6188", + "kind":"type", + "trigger":"OALTS" + }, { "contents":"integrator GSSSSU0 ${1:(1)} ${2:(2)} ${3:(3)} ${4:(4)}\n# (1) int, unique integrator tag\n# (2-4) double, three spectral radii", "details":"U0 family scheme for GSSSS", @@ -1827,6 +1849,12 @@ "kind":"type", "trigger":"GeneralisedAlpha" }, + { + "contents":"integrator GeneralisedAlphaExplicit ${1:(1)} ${2:[2]}\n# (1) int, unique tag\n# [2] double, spectral radius at infinite frequency, default: 0.5", + "details":"", + "kind":"type", + "trigger":"GeneralisedAlphaExplicit" + }, { "contents":"integrator GeneralisedAlpha ${1:(1)} ${2:[2]}\n# (1) int, unique tag\n# [2] double, spectral radius at infinite frequency, default: 0.5\n\nintegrator GeneralisedAlpha ${1:(1)} ${2:(2)} ${3:(3)}\n# (1) int, unique tag\n# (2) double, \\alpha_f\n# (3) double, \\alpha_m", "details":"", @@ -1941,6 +1969,18 @@ "kind":"type", "trigger":"Elastic1D" }, + { + "contents":"material Sinh1D ${1:(1)} ${2:(2)} ${3:[3]}\n# (1) int, unique material tag\n# (2) double, elastic modulus\n# [3] double, density, default: 0.0", + "details":"Uniaxial Nonlinear Elastic Material Using Sinh", + "kind":"type", + "trigger":"Sinh1D" + }, + { + "contents":"material Tanh1D ${1:(1)} ${2:(2)} ${3:[3]}\n# (1) int, unique material tag\n# (2) double, elastic modulus\n# [3] double, density, default: 0.0", + "details":"Uniaxial Nonlinear Elastic Material Using Tanh", + "kind":"type", + "trigger":"Tanh1D" + }, { "contents":"material Flag01 ${1:(1)} ${2:(2)} ${3:(3)} ${4:(4)} ${5:(5)}\n# (1) int, unique material tag\n# (2) double, elastic modulus\n# (3) double, yield stress, positive\n# (4) double, residual stress, can be either positive or negative\n# (5) double, hardening ratio", "details":"", diff --git a/Enhancement/suanPan.sublime-syntax b/Enhancement/suanPan.sublime-syntax index 08121e43c..07e16f8df 100644 --- a/Enhancement/suanPan.sublime-syntax +++ b/Enhancement/suanPan.sublime-syntax @@ -18,7 +18,7 @@ contexts: - match: '\b(?i)(Allman|B21|B21EL|B21EH|B21H|B31|NMB21(EL|EH)|NMB31|C3D20|C3D4|C3D8|C3D8I|C3D8R|CAX3|CAX4|CAX8|CIN3D8|CINP4|Contact2D|Contact3D|CP3|CSMT3|CSMT6|CSMQ[4-8]|CP4I|CP[4-8]|Damper0[1-4]|DC3D4|DC3D8|DCP3|DCP4|DKT3|DKT4|DKTS3|DKTS4|EB21|F21|F21H|F31|GCMQ|SGCMQ|GQ12|Joint|Mass(Point)([2-3]D)|Membrane|Mindlin|MVLEM|NodeLine|NodeFacet|PatchCube|PatchQuad|PCPE4DC|PCPE4DI|PCPE8DC|PCPE4UC|PCPE8UC|PS|QE2|S4|SGCMS|SingleSection2D|SingleSection3D|Spring01|Spring02|T[2|3]D2S?|R[2|3]D2|Tie|Embed[2|3]D|S?GCMQ(I|L|G)?|(T|B)[2|3]D(L|C)|Embedded[2|3]D|Sleeve[2|3]D)\b' scope: storage # material - - match: '\b(?i)(AFC0[1-3]|AFCN|ArmstrongFrederick(1D)?|Axisymmetric(Elastic)?|Bilinear([1-2]D|CC|DP|J2|OO|PO|Peric|Hoffman)|BilinearElastic1D|NLE1D01|BilinearMises1D|BlatzKo|BoucWen|BWBN|CDP(M2(NO|ISO|ANISO)?)?|Concrete2[1-2]|Concrete(CM|Tsai|Exp)|CoulombFriction|DafaliasManzari|Degradation|Elastic(1|2)D|Exp(CC|DP|Gurson|Gurson1D|Hoffman|J2|Mises1D)|Flag0(1|2)|Fluid|IsotropicDamage|Isotropic(Nonlinear)?Elastic3D|Kelvin|Laminated|LinearDamage|Maxwell|Metal|Mises1D|MooneyRivlin|MPF|Multilinear((Elastic)?1D|J2|OO|PO)|NLE3D01|OrthotropicElastic3D|ParabolicCC|Parallel|PlaneStrain|PlaneSymmetric(1|2)3|PlaneStress|PolyJ2|RambergOsgood|Rebar[2|3]D|Sequential|SimpleSand|SlipLock|Stacked|SteelBRB|TableCDP|Trivial|Uniaxial|VAFCRP(1D)?|Viscosity0(1|2)|Yeoh|Elastic3D|TrilinearDegradation|TableGurson|Substepping|PolyElastic1D|Rotation[2|3]D|MultilinearMises1D|Gap01|Dhakal|ConcreteTable|BilinearViscosity)\b' + - match: '\b(?i)(AFC0[1-3]|AFCN|ArmstrongFrederick(1D)?|Axisymmetric(Elastic)?|Bilinear([1-2]D|CC|DP|J2|OO|PO|Peric|Hoffman)|BilinearElastic1D|NLE1D01|BilinearMises1D|BlatzKo|BoucWen|BWBN|CDP(M2(NO|ISO|ANISO)?)?|Concrete2[1-2]|Concrete(CM|Tsai|Exp)|CoulombFriction|DafaliasManzari|Degradation|Elastic(1|2)D|Exp(CC|DP|Gurson|Gurson1D|Hoffman|J2|Mises1D)|Flag0(1|2)|Fluid|IsotropicDamage|Isotropic(Nonlinear)?Elastic3D|Kelvin|Laminated|LinearDamage|Maxwell|Metal|Mises1D|MooneyRivlin|MPF|Multilinear((Elastic)?1D|J2|OO|PO)|NLE3D01|OrthotropicElastic3D|ParabolicCC|Parallel|PlaneStrain|PlaneSymmetric(1|2)3|PlaneStress|PolyJ2|RambergOsgood|Rebar[2|3]D|Sequential|SimpleSand|SlipLock|Stacked|SteelBRB|TableCDP|Trivial|Uniaxial|VAFCRP(1D)?|Viscosity0(1|2)|Yeoh|Elastic3D|TrilinearDegradation|TableGurson|Substepping|PolyElastic1D|Rotation[2|3]D|MultilinearMises1D|Gap01|Dhakal|ConcreteTable|BilinearViscosity|Sinh1D|Tanh1D)\b' scope: storage # section - match: '\b(?i)(Bar[2|3]D|Box[2|3]D|Circle[1-3]D|CircularHollow[2|3]D|EU[2|3]D|Fibre[1-3]D|HSection2D|ISection[2|3]D|NM[2|3]D[1-3]|NM[2|3]D3K|NZ[2|3]D|Rectangle[1-3]D|TrussSection|TSection[2-3]D|US[2|3]D)\b' @@ -32,7 +32,7 @@ contexts: - match: '\b(?i)(Hann|Hamming|Blackman|BlackmanNuttall|BlackmanHarris|FlatTop)\b' scope: string # other - - match: '\b(?i)(Abs(Error|IncreEnergy|Residual)|ArcLength|Logic(AND|OR|XOR)|BatheTwoStep|BFGS|Buckle|Combine|Constant|Converger|Decay|Dynamic|FEAST|FixedNumber|FixedLength[2|3]D|Min(imum)?Gap[2|3]D|Max(imum)?Gap[2|3]D|Frequency|Generali(s|z)edAlpha|GSSSS(U|V)0|GSSSSOptimal|LeeNewmark(Full)?|Linear|(Max|Min)Displacement|MaxHistory|(Max|Min)Resistance|Modulated|MPC|MPDC|m?Newton|NZStrongMotion|Optimization|OutputType|ParticleCollision[2|3]D|LJPotential2D|Ramm|(Rayleigh)?Newmark|(Rel|Abs)(Incre)?Disp|Rel(Error|IncreEnergy|Residual)|(Finite)RigidWall(Penalty|Multiplier)|(Finite)RestitutionWall(Penalty)|Static|StrainEnergyEvolution|Tabular(Spline)|Sine|Cosine|WilsonPenzienNewmark|LeeElementalNewmark)\b' + - match: '\b(?i)((Abs|Rel)(Error|IncreEnergy|Residual)|ArcLength|Logic(AND|OR|XOR)|OALTS|Bathe(TwoStep|Explicit)|BFGS|Buckle|Combine|Constant|Converger|Decay|(Implicit|Explicit)Dynamic|FEAST|FixedNumber|FixedLength[2|3]D|Min(imum)?Gap[2|3]D|Max(imum)?Gap[2|3]D|Frequency|Generali(s|z)edAlpha(Explicit)|GSSSS(U|V)0|GSSSSOptimal|LeeNewmark(Full)?|Linear|(Max|Min)Displacement|MaxHistory|(Max|Min)Resistance|Modulated|MPC|MPDC|m?Newton|NZStrongMotion|Optimization|OutputType|ParticleCollision[2|3]D|LJPotential2D|Ramm|(Rayleigh)?Newmark|Tchamwa|(Rel|Abs)(Incre)?(Disp|Acc)|(Finite)RigidWall(Penalty|Multiplier)|(Finite)RestitutionWall(Penalty)|Static|StrainEnergyEvolution|Tabular(Spline)|Sine|Cosine|WilsonPenzienNewmark|LeeElementalNewmark)\b' scope: string # output type - match: '\b(?i)(A[1-6]?|AR[1-3]?|AT|AXIAL|CSE|DAMAGE|DC|DF[1-6]?|DM[1-3]?|DT|E|E11|E12|E13|E22|E23|E33|ED|EE|EE11|EE12|EE13|EE22|EE23|EE33|EEEQ|EEP|EEP1|EEP2|EEP3|EEQ|EINT|EP[1-3]?|ES|HIST|HYDRO|IF[1-6]?|IM[1-3]{1}|K|KAPPAC|KAPPAP|KAPPAT|KE|LITR|M|MISES|MOMENT|MOMENTUM((R?)[XYZ])?|NL|NMISES|PE|PE11|PE12|PE13|PE22|PE23|PE33|PEEQ|PEP[1-3]?|PP|REBARE|REBARS|RESULTANT|RF[1-6]?|RM[1-3]?|RT|S|S11|S12|S13|S22|S23|S33|SD|SE|SHEAR|SINT|SINV|SP[1-3]?|SS|TORSION|TRESC|TSE|U[1-6]?|UR[1-3]?|UT|V[1-6]?|VD|VF|VR[1-3]?|VS|VT|GDF|YF|VE|BEAM[SE])\b' diff --git a/Enhancement/suanpan.nuspec b/Enhancement/suanpan.nuspec index 03442a32c..82143bf3d 100644 --- a/Enhancement/suanpan.nuspec +++ b/Enhancement/suanpan.nuspec @@ -2,7 +2,7 @@ suanpan - 2.6.1 + 2.7 https://github.com/TLCFEM/suanPan Theodore Chang suanPan diff --git a/Example/Element/Allman.supan b/Example/Element/Allman.supan index 0c102fa95..416793921 100644 --- a/Example/Element/Allman.supan +++ b/Example/Element/Allman.supan @@ -23,18 +23,18 @@ step static 1 analyze # Node 2: -# 0.5000 0 +# 0.5000 0 # Displacement: -# 0.3256 0.9286 1.8109 +# 0.3256 0.9286 1.8109 # Resistance: -# 7.5862e-09 1.0000e+00 -1.0345e-09 +# 7.5862e-09 1.0000e+00 -1.0345e-09 # # Node 3: -# 0.5000 0.5000 +# 0.5000 0.5000 # Displacement: -# -0.4379 0.8795 1.3261 +# -0.4379 0.8795 1.3261 # Resistance: -# -6.2069e-09 1.0000e+00 -2.1839e-09 +# -6.2069e-09 1.0000e+00 -2.1839e-09 peek node 2 3 peek element 1 diff --git a/Example/Element/C3D20.supan b/Example/Element/C3D20.supan index 9626b7848..701dbe184 100644 --- a/Example/Element/C3D20.supan +++ b/Example/Element/C3D20.supan @@ -241,7 +241,7 @@ converger RelIncreDisp 1 1E-10 7 1 analyze -# -6.7782 7.3797 -0.1144 +# -6.7782 7.3797 -0.1144 peek node 12 peek element 1 diff --git a/Example/Element/C3D20NL.supan b/Example/Element/C3D20NL.supan index 0af327258..ed0380fea 100644 --- a/Example/Element/C3D20NL.supan +++ b/Example/Element/C3D20NL.supan @@ -45,11 +45,11 @@ converger RelIncreDisp 1 1E-12 50 1 analyze # Node 7: -# 2.0000 2.0000 2.0000 +# 2.0000 2.0000 2.0000 # Displacement: -# -0.2317 -0.2317 1.0000 +# -0.2317 -0.2317 1.0000 # Resistance: -# 9.4141e-15 5.1640e-15 -4.6875e+01 +# 9.4141e-15 5.1640e-15 -4.6875e+01 peek node 7 peek element 1 diff --git a/Example/Element/C3D4.supan b/Example/Element/C3D4.supan index c67f772c7..2974efac4 100644 --- a/Example/Element/C3D4.supan +++ b/Example/Element/C3D4.supan @@ -2198,8 +2198,8 @@ fix2 1 P 1 2 5 8 9 10 11 12 35 36 37 38 60 61 62 63 73 74 75 76 201 202 203 204 displacement 1 0 .5 1 3 4 6 7 22 23 24 25 48 49 50 51 52 53 54 55 56 57 58 59 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 step static 1 -set fixed_step_size 1 -set ini_step_size 1 +set fixed_step_size 0 +set ini_step_size 1E-1 set system_solver Spike converger RelIncreDisp 1 1E-8 4 1 @@ -2207,32 +2207,36 @@ converger RelIncreDisp 1 1E-8 4 1 analyze # Node 4: +# Coordinate: # 1.0000 1.0000 0 # Displacement: # 0.5000 0.0039 0.1690 # Resistance: -# 1.9400E-02 -1.1362E-15 -3.3307E-16 +# 1.9400e-02 2.5761e-16 -1.3852e-15 # # Node 7: +# Coordinate: # 0 1.0000 0 # Displacement: -# 5.0000E-01 3.0289E-05 -1.5806E-01 +# 5.0000e-01 3.0289e-05 -1.5806e-01 # Resistance: -# 1.3049E-01 2.7730E-15 -7.4246E-16 +# 1.3049e-01 1.7152e-16 5.1348e-16 # # Node 3: +# Coordinate: # 1.0000 0 0 # Displacement: -# 5.0000E-01 4.5624E-05 1.5947E-01 +# 5.0000e-01 4.5624e-05 1.5947e-01 # Resistance: -# 1.2906E-01 -2.0687E-15 1.3878E-15 +# 1.2906e-01 1.2782e-15 8.5348e-16 # # Node 6: +# Coordinate: # 0 0 0 # Displacement: # 0.5000 0.0037 -0.1672 # Resistance: -# 4.0423E-02 -2.0210E-16 -2.0001E-15 +# 4.0423e-02 5.0068e-16 -4.9613e-16 peek node 4 7 3 6 peek element 1 diff --git a/Example/Element/C3D4NL.supan b/Example/Element/C3D4NL.supan index 61996d6c4..9cd96de19 100644 --- a/Example/Element/C3D4NL.supan +++ b/Example/Element/C3D4NL.supan @@ -57,11 +57,11 @@ converger RelIncreDisp 1 1E-12 7 1 analyze # Node 14: -# 0.5000 0.5000 1.0000 +# 0.5000 0.5000 1.0000 # Displacement: -# -0.0225 0.0225 0.2000 +# -0.0225 0.0225 0.2000 # Resistance: -# 1.6725e-15 -7.0979e-16 8.8000e+01 +# 1.6725e-15 -7.0979e-16 8.8000e+01 peek node 14 reset diff --git a/Example/Element/C3D8R.supan b/Example/Element/C3D8R.supan index db7da4169..fa3205694 100644 --- a/Example/Element/C3D8R.supan +++ b/Example/Element/C3D8R.supan @@ -41,18 +41,18 @@ analyze peek element 1 # Node 13: -# 0.5000 -0.5000 2.5000 +# 0.5000 -0.5000 2.5000 # Displacement: -# -0.0068 0 0.1000 +# -0.0068 0 0.1000 # Resistance: -# -1.0270e-15 -8.0838e-16 8.7546e+00 +# -1.0270e-15 -8.0838e-16 8.7546e+00 # # Node 14: -# 0.5000 0.5000 2.5000 +# 0.5000 0.5000 2.5000 # Displacement: -# -0.0068 -0.0068 0.1000 +# -0.0068 -0.0068 0.1000 # Resistance: -# -1.0304e-15 -8.3961e-16 8.7546e+00 +# -1.0304e-15 -8.3961e-16 8.7546e+00 peek node 13 14 reset diff --git a/Example/Element/CAX3.supan b/Example/Element/CAX3.supan index d9d3e9097..e533416ca 100644 --- a/Example/Element/CAX3.supan +++ b/Example/Element/CAX3.supan @@ -20,18 +20,18 @@ step static 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 2.3873e-02 -1.3401e-18 +# 2.3873e-02 -1.3401e-18 # Resistance: -# 1.0000e+00 -2.9066e-17 +# 1.0000e+00 -2.9066e-17 # # Node 3: -# 1.0000 1.0000 +# 1.0000 1.0000 # Displacement: -# 0.0239 -0.0053 +# 0.0239 -0.0053 # Resistance: -# 1.0000e+00 2.4111e-17 +# 1.0000e+00 2.4111e-17 peek node 2 3 peek element 1 diff --git a/Example/Element/CAX4.supan b/Example/Element/CAX4.supan index 7886e73fb..60cd9221d 100644 --- a/Example/Element/CAX4.supan +++ b/Example/Element/CAX4.supan @@ -34,25 +34,25 @@ converger RelIncreDisp 1 1E-10 20 1 analyze # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0.0024 0 +# 0.0024 0 # Resistance: -# -6.0779e-08 4.5099e+00 +# -6.0779e-08 4.5099e+00 # # Node 6: -# 2.0000 1.0000 +# 2.0000 1.0000 # Displacement: -# 0.0024 -0.0025 +# 0.0024 -0.0025 # Resistance: -# -4.6574e-08 -6.0838e-08 +# -4.6574e-08 -6.0838e-08 # # Node 9: -# 2.0000 2.0000 +# 2.0000 2.0000 # Displacement: -# 0.0024 -0.0050 +# 0.0024 -0.0050 # Resistance: -# 1.7420e-15 -4.5099e+00 +# 1.7420e-15 -4.5099e+00 peek node 3 6 9 peek element 1 diff --git a/Example/Element/CAX8.supan b/Example/Element/CAX8.supan index b8419bff3..33a52e67a 100644 --- a/Example/Element/CAX8.supan +++ b/Example/Element/CAX8.supan @@ -29,25 +29,25 @@ analyze peek element 1 # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0.0200 0 +# 0.0200 0 # Resistance: -# -5.7953e-17 5.0265e+00 +# -5.7953e-17 5.0265e+00 # # Node 6: -# 2.0000 1.0000 +# 2.0000 1.0000 # Displacement: -# 0.0200 -0.1000 +# 0.0200 -0.1000 # Resistance: -# -5.7096e-18 -8.8818e-16 +# -5.7096e-18 -8.8818e-16 # # Node 9: -# 2.0000 2.0000 +# 2.0000 2.0000 # Displacement: -# 0.0200 -0.2000 +# 0.0200 -0.2000 # Resistance: -# -3.9183e-16 -5.0265e+00 +# -3.9183e-16 -5.0265e+00 peek node 3 6 9 reset diff --git a/Example/Element/CIN3D8.supan b/Example/Element/CIN3D8.supan index 4341fe4dd..79ba0214b 100644 --- a/Example/Element/CIN3D8.supan +++ b/Example/Element/CIN3D8.supan @@ -43,11 +43,11 @@ analyze peek element 3 # Node 1: -# 0.5000 -0.5000 -0.5000 +# 0.5000 -0.5000 -0.5000 # Displacement: -# 0.0188 -0.0188 0.5000 +# 0.0188 -0.0188 0.5000 # Resistance: -# 2.8950e-15 -1.9344e-15 4.7108e+01 +# 2.8950e-15 -1.9344e-15 4.7108e+01 peek node 1 reset diff --git a/Example/Element/CINP4.supan b/Example/Element/CINP4.supan index 600fa26c6..d2bb86924 100644 --- a/Example/Element/CINP4.supan +++ b/Example/Element/CINP4.supan @@ -33,18 +33,18 @@ analyze save recorder 1 2 # Node 1: -# 0 0 0 +# 0 0 0 # Displacement: -# 0.0071 0 +# 0.0071 0 # Resistance: -# 1.0000 0.2000 +# 1.0000 0.2000 # # Node 4: -# 0 1.0000 0 +# 0 1.0000 0 # Displacement: -# 0.0071 0 +# 0.0071 0 # Resistance: -# 1.0000 -0.2000 +# 1.0000 -0.2000 peek node 1 4 peek element 4 diff --git a/Example/Element/CP4NL.supan b/Example/Element/CP4NL.supan index 10a0c3cc3..1055e5872 100644 --- a/Example/Element/CP4NL.supan +++ b/Example/Element/CP4NL.supan @@ -169,11 +169,11 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 83: -# 4.0000 0.5000 +# 4.0000 0.5000 # Displacement: -# -0.1456 0.9790 +# -0.1456 0.9790 # Resistance: -# 1.6098e-14 1.0000e+00 +# 1.6098e-14 1.0000e+00 peek node 83 peek element 1 diff --git a/Example/Element/CP4R.supan b/Example/Element/CP4R.supan index 4136fff5c..38b976659 100644 --- a/Example/Element/CP4R.supan +++ b/Example/Element/CP4R.supan @@ -167,11 +167,11 @@ converger RelIncreDisp 1 1E-10 7 1 analyze # Node 83: -# 4.0000 0.5000 +# 4.0000 0.5000 # Displacement: -# -1.5148e-16 5.6591e+00 +# -1.5148e-16 5.6591e+00 # Resistance: -# 8.8818e-16 2.5000e-01 +# 8.8818e-16 2.5000e-01 peek node 83 exit \ No newline at end of file diff --git a/Example/Element/CP5.supan b/Example/Element/CP5.supan index b11bca687..3e8a77220 100644 --- a/Example/Element/CP5.supan +++ b/Example/Element/CP5.supan @@ -25,18 +25,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 3: -# 1.0000 1.0000 +# 1.0000 1.0000 # Displacement: -# -0.0002 0.0007 +# -0.0002 0.0007 # Resistance: -# 2.7602e-09 1.0000e+00 +# 2.7602e-09 1.0000e+00 # # Node 4: -# 0 1.0000 +# 0 1.0000 # Displacement: -# -3.8935e-13 6.6633e-04 +# -3.8935e-13 6.6633e-04 # Resistance: -# -2.7602e-09 1.0000e+00 +# -2.7602e-09 1.0000e+00 peek node 3 4 peek element 1 diff --git a/Example/Element/CP7.supan b/Example/Element/CP7.supan index 077f75714..9ebee930f 100644 --- a/Example/Element/CP7.supan +++ b/Example/Element/CP7.supan @@ -27,18 +27,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 1: -# 0 0 +# 0 0 # Displacement: -# 6.6700e-04 5.5689e-13 +# 6.6700e-04 5.5689e-13 # Resistance: -# 1.0000e+00 1.2778e-09 +# 1.0000e+00 1.2778e-09 # # Node 4: -# 0 1.0000 +# 0 1.0000 # Displacement: -# 0.0007 0.0002 +# 0.0007 0.0002 # Resistance: -# 1.0000e+00 -1.2778e-09 +# 1.0000e+00 -1.2778e-09 peek node 1 4 peek element 1 diff --git a/Example/Element/CSMQ4.supan b/Example/Element/CSMQ4.supan index 4e13f6e14..2cfe8c5ae 100644 --- a/Example/Element/CSMQ4.supan +++ b/Example/Element/CSMQ4.supan @@ -36,18 +36,18 @@ step static 1 analyze # Node 6: -# 10.0000 0 +# 10.0000 0 # Displacement: -# 9.8675 67.7433 9.9382 +# 9.8675 67.7433 9.9382 # Resistance: -# 2.0535e-12 1.5000e+02 -1.4866e-13 +# 2.0535e-12 1.5000e+02 -1.4866e-13 # # Node 12: -# 10.0000 2.0000 +# 10.0000 2.0000 # Displacement: -# -9.8675 67.7433 9.9382 +# -9.8675 67.7433 9.9382 # Resistance: -# -6.3949e-13 1.5000e+02 3.2729e-13 +# -6.3949e-13 1.5000e+02 3.2729e-13 peek node 6 12 peek element 1 diff --git a/Example/Element/CSMQ7.supan b/Example/Element/CSMQ7.supan index eb335f8bf..105329196 100644 --- a/Example/Element/CSMQ7.supan +++ b/Example/Element/CSMQ7.supan @@ -27,18 +27,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 1: -# 0 0 +# 0 0 # Displacement: -# 6.6667e-04 9.2946e-19 9.7766e-20 +# 6.6667e-04 9.2946e-19 9.7766e-20 # Resistance: -# 1.0000e+00 -4.5103e-17 -1.4831e-16 +# 1.0000e+00 -4.5103e-17 -1.4831e-16 # # Node 4: -# 0 1.0000 +# 0 1.0000 # Displacement: -# 6.6667e-04 1.6667e-04 -2.0224e-18 +# 6.6667e-04 1.6667e-04 -2.0224e-18 # Resistance: -# 1.0000e+00 1.3878e-17 -1.3896e-16 +# 1.0000e+00 1.3878e-17 -1.3896e-16 peek node 1 4 peek element 1 diff --git a/Example/Element/CSMQ8.supan b/Example/Element/CSMQ8.supan index ba98653d5..5eafd8339 100644 --- a/Example/Element/CSMQ8.supan +++ b/Example/Element/CSMQ8.supan @@ -168,11 +168,11 @@ step static 1 analyze # Node 117: -# 20.0000 5.0000 +# 20.0000 5.0000 # Displacement: -# -1.0030e-12 3.7835e+00 3.0374e-01 +# -1.0030e-12 3.7835e+00 3.0374e-01 # Resistance: -# -1.8957e-14 2.0000e+00 4.3889e-15 +# -1.8957e-14 2.0000e+00 4.3889e-15 peek node 117 peek element 1 diff --git a/Example/Element/CSMT3.supan b/Example/Element/CSMT3.supan index 01b8a0c27..340c17c64 100644 --- a/Example/Element/CSMT3.supan +++ b/Example/Element/CSMT3.supan @@ -21,18 +21,18 @@ step static 1 analyze # Node 2: -# 0.5000 0 +# 0.5000 0 # Displacement: -# 0.0828 0.4881 0.9366 +# 0.0828 0.4881 0.9366 # Resistance: -# 1.0784e-08 1.0000e+00 -2.5547e-09 +# 1.0784e-08 1.0000e+00 -2.5547e-09 # # Node 3: -# 0.5000 0.5000 +# 0.5000 0.5000 # Displacement: -# -0.0995 0.5055 0.8907 +# -0.0995 0.5055 0.8907 # Resistance: -# -9.9193e-09 1.0000e+00 1.3605e-09 +# -9.9193e-09 1.0000e+00 1.3605e-09 peek node 2 3 peek element 1 diff --git a/Example/Element/CSMT6.supan b/Example/Element/CSMT6.supan index 43ad1c417..2b791ad96 100644 --- a/Example/Element/CSMT6.supan +++ b/Example/Element/CSMT6.supan @@ -24,18 +24,18 @@ step static 1 analyze # Node 2: -# 0.5000 0 +# 0.5000 0 # Displacement: -# 0 0 0 +# 0 0 0 # Resistance: -# -0.3687 0.8794 0.1169 +# -0.3687 0.8794 0.1169 # # Node 3: -# 0.5000 0.5000 +# 0.5000 0.5000 # Displacement: -# 0.7454 -0.2755 -1.4696 +# 0.7454 -0.2755 -1.4696 # Resistance: -# 2.0000e+00 5.4801e-13 -1.9798e-10 +# 2.0000e+00 5.4801e-13 -1.9798e-10 peek node 2 3 peek element 1 diff --git a/Example/Element/Contact2D.supan b/Example/Element/Contact2D.supan index cd8bddad1..da513b2e2 100644 --- a/Example/Element/Contact2D.supan +++ b/Example/Element/Contact2D.supan @@ -32,11 +32,11 @@ converger RelIncreDisp 2 1E-11 20 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 5.6389e-19 -2.0000e-01 -3.0000e-01 +# 5.6389e-19 -2.0000e-01 -3.0000e-01 # Resistance: -# 6.7667e-17 -8.0000e+00 6.0000e-08 +# 6.7667e-17 -8.0000e+00 6.0000e-08 peek node 2 5 6 peek group 1 2 diff --git a/Example/Element/DC3D4.supan b/Example/Element/DC3D4.supan index 6e9f79fda..2c2167d1e 100644 --- a/Example/Element/DC3D4.supan +++ b/Example/Element/DC3D4.supan @@ -2200,7 +2200,7 @@ displacement 1 0 .5 1 3 4 6 7 22 23 24 25 48 49 50 51 52 53 54 55 56 57 58 59 14 step static 1 set fixed_step_size 1 -set ini_step_size 1E-2 +set ini_step_size 1E-1 set sparse_mat 0 converger RelIncreDisp 1 1E-8 4 1 diff --git a/Example/Element/DCP4.supan b/Example/Element/DCP4.supan index 066ac1421..4b0c838d2 100644 --- a/Example/Element/DCP4.supan +++ b/Example/Element/DCP4.supan @@ -2150,7 +2150,7 @@ groupdisplacement 2 0 1 2 3 step static 1 1 set fixed_step_size 1 -set ini_step_size 1E-2 +set ini_step_size 1E-1 converger RelIncreDisp 1 1E-10 20 1 diff --git a/Example/Element/DKT3.supan b/Example/Element/DKT3.supan index 353b14f27..669390c09 100644 --- a/Example/Element/DKT3.supan +++ b/Example/Element/DKT3.supan @@ -334,11 +334,11 @@ step static 1 analyze # Node 2: -# 0.5000 0.5000 0 +# 0.5000 0.5000 0 # Displacement: -# 5.7269e-03 -4.6327e-17 -1.1161e-17 +# 5.7269e-03 -4.6327e-17 -1.1161e-17 # Resistance: -# 1.0000e+00 2.2547e-15 3.5388e-16 +# 1.0000e+00 2.2547e-15 3.5388e-16 peek node 2 peek element 1 diff --git a/Example/Element/DKT4.supan b/Example/Element/DKT4.supan index aaf6f7e3e..02ff1ca94 100644 --- a/Example/Element/DKT4.supan +++ b/Example/Element/DKT4.supan @@ -63,11 +63,11 @@ converger RelIncreDisp 1 1E-8 2 1 analyze # Node 21: -# 0.5000 0.5000 +# 0.5000 0.5000 # Displacement: -# -6.3745e-03 1.1526e-17 -9.7535e-18 +# -6.3745e-03 1.1526e-17 -9.7535e-18 # Resistance: -# -1.0000e+00 6.9389e-17 -6.9389e-18 +# -1.0000e+00 6.9389e-17 -6.9389e-18 peek node 21 peek element 1 diff --git a/Example/Element/DKT48.supan b/Example/Element/DKT48.supan index 95a106b07..8c887f635 100644 --- a/Example/Element/DKT48.supan +++ b/Example/Element/DKT48.supan @@ -164,11 +164,11 @@ converger RelIncreDisp 1 1E-8 2 1 analyze # Node 57: -# 0.5000 0.5000 +# 0.5000 0.5000 # Displacement: -# -5.8829e-03 1.0960e-17 -5.6851e-18 +# -5.8829e-03 1.0960e-17 -5.6851e-18 # Resistance: -# -1.0000e+00 -2.7756e-17 3.3480e-16 +# -1.0000e+00 -2.7756e-17 3.3480e-16 peek node 57 exit \ No newline at end of file diff --git a/Example/Element/DKTS3.supan b/Example/Element/DKTS3.supan index 5fadada17..243310b37 100644 --- a/Example/Element/DKTS3.supan +++ b/Example/Element/DKTS3.supan @@ -334,11 +334,11 @@ step static 1 analyze # Node 2: -# 0.5000 0.5000 0 +# 0.5000 0.5000 0 # Displacement: -# 5.7269e-03 -4.6327e-17 -1.1161e-17 +# 5.7269e-03 -4.6327e-17 -1.1161e-17 # Resistance: -# 1.0000e+00 2.2547e-15 3.5388e-16 +# 1.0000e+00 2.2547e-15 3.5388e-16 peek node 2 peek element 1 diff --git a/Example/Element/Damper01.supan b/Example/Element/Damper01.supan index 5825dacfc..ece405164 100644 --- a/Example/Element/Damper01.supan +++ b/Example/Element/Damper01.supan @@ -26,7 +26,7 @@ plainrecorder 1 Element S 2 plainrecorder 2 Element E 2 plainrecorder 3 Element V 2 -step dynamic 1 20 +step dynamic 1 2 set ini_step_size .01 set fixed_step_size 1 @@ -38,6 +38,19 @@ analyze peek element 2 3 +# Node 2: +# Coordinate: +# 1.0000 0 +# Displacement: +# -0.1894 0 +# Resistance: +# -6.9652 0 +# Velocity: +# -0.4712 0 +# Acceleration: +# 6.9652 0 +peek node 2 + reset clear exit \ No newline at end of file diff --git a/Example/Element/Damper02.supan b/Example/Element/Damper02.supan index 404ad9d2d..ce9a6ef5c 100644 --- a/Example/Element/Damper02.supan +++ b/Example/Element/Damper02.supan @@ -22,7 +22,7 @@ amplitude Combine 1 2 3 # acceleration 2 1 1 1 2 displacement 2 1 1 1 2 -step dynamic 1 20 +step dynamic 1 2 set ini_step_size .01 set fixed_step_size 1 @@ -34,6 +34,19 @@ analyze peek element 2 3 +# Node 2: +# Coordinate: +# 1.0000 0 +# Displacement: +# -9.7700e-15 0 +# Resistance: +# 86.7135 0 +# Velocity: +# 1.2571e+02 0 +# Acceleration: +# -1.6882e-07 0 +peek node 2 + reset clear exit \ No newline at end of file diff --git a/Example/Element/Damper03.supan b/Example/Element/Damper03.supan index 9e30fd0c4..2e30b7c44 100644 --- a/Example/Element/Damper03.supan +++ b/Example/Element/Damper03.supan @@ -27,7 +27,7 @@ plainrecorder 1 Element S 2 plainrecorder 2 Element E 2 plainrecorder 3 Element V 2 -step dynamic 1 20 +step dynamic 1 2 set ini_step_size .01 set fixed_step_size 1 @@ -39,6 +39,19 @@ analyze peek element 2 3 +# Node 2: +# Coordinate: +# 1.0000 0 0 +# Displacement: +# -0.1894 0 0 +# Resistance: +# -6.9652 0 0 +# Velocity: +# -0.4712 0 0 +# Acceleration: +# 6.9652 0 0 +peek node 2 + reset clear exit \ No newline at end of file diff --git a/Example/Element/Damper04.supan b/Example/Element/Damper04.supan index 47c1f37f9..4d207776c 100644 --- a/Example/Element/Damper04.supan +++ b/Example/Element/Damper04.supan @@ -22,7 +22,7 @@ amplitude Combine 1 2 3 # acceleration 2 1 1 1 2 displacement 2 1 1 1 2 -step dynamic 1 20 +step dynamic 1 2 set ini_step_size .01 set fixed_step_size 1 @@ -34,6 +34,19 @@ analyze peek element 2 3 +# Node 2: +# Coordinate: +# 1.0000 0 0 +# Displacement: +# -9.7700e-15 0 0 +# Resistance: +# 86.7135 0 0 +# Velocity: +# 1.2571e+02 0 0 +# Acceleration: +# -1.6882e-07 0 0 +peek node 2 + reset clear exit \ No newline at end of file diff --git a/Example/Element/EB21.supan b/Example/Element/EB21.supan index dd15c3c7d..4a4f4b52c 100644 --- a/Example/Element/EB21.supan +++ b/Example/Element/EB21.supan @@ -29,18 +29,18 @@ converger RelIncreDisp 1 1E-10 5 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0 0.2167 0.3250 +# 0 0.2167 0.3250 # Resistance: -# 0 1.0000e+01 -6.5001e-08 +# 0 1.0000e+01 -6.5001e-08 # # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0 0.1167 -0.1750 +# 0 0.1167 -0.1750 # Resistance: -# 0 8.7490e-08 3.4999e-08 +# 0 8.7490e-08 3.4999e-08 peek node 2 3 peek element 1 diff --git a/Example/Element/GCMQ.supan b/Example/Element/GCMQ.supan index 0830daff2..cc3860a22 100644 --- a/Example/Element/GCMQ.supan +++ b/Example/Element/GCMQ.supan @@ -70,7 +70,7 @@ step static 1 analyze -# 20.0424 99.2251 18.1661 +# 20.0424 99.2251 18.1661 peek node 6 peek element 1 diff --git a/Example/Element/IP.supan b/Example/Element/IP.supan index 316992b8b..f5b346625 100644 --- a/Example/Element/IP.supan +++ b/Example/Element/IP.supan @@ -58,7 +58,7 @@ converger RelIncreDisp 1 1E-8 5 1 analyze -# -0.0095 0.1948 0.2913 +# -0.0095 0.1948 0.2913 peek node 2 peek element 1 diff --git a/Example/Element/Joint.supan b/Example/Element/Joint.supan index e2aefb9bc..038d17680 100644 --- a/Example/Element/Joint.supan +++ b/Example/Element/Joint.supan @@ -25,18 +25,18 @@ converger AbsIncreDisp 1 1E-8 3 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0 0.2667 0.4000 +# 0 0.2667 0.4000 # Resistance: -# 0 1.0000e+01 8.8818e-16 +# 0 1.0000e+01 8.8818e-16 # # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0 0.0667 -0.1000 +# 0 0.0667 -0.1000 # Resistance: -# 0 -4.4409e-16 -2.2204e-16 +# 0 -4.4409e-16 -2.2204e-16 peek node 2 3 peek element 1 diff --git a/Example/Element/MassPoint.supan b/Example/Element/MassPoint.supan index b939e8f8d..a6cde3ab1 100644 --- a/Example/Element/MassPoint.supan +++ b/Example/Element/MassPoint.supan @@ -26,7 +26,7 @@ plainrecorder 1 Element S 2 plainrecorder 2 Element E 2 plainrecorder 3 Element V 2 -step dynamic 1 20 +step dynamic 1 2 set ini_step_size .01 set fixed_step_size 1 @@ -38,6 +38,19 @@ analyze peek element 2 3 +# Node 2: +# Coordinate: +# 1.0000 0 +# Displacement: +# -0.1894 0 +# Resistance: +# -6.9652 0 +# Velocity: +# -0.4712 0 +# Acceleration: +# 6.9652 0 +peek node 2 + reset clear exit \ No newline at end of file diff --git a/Example/Element/PCPE4DC.supan b/Example/Element/PCPE4DC.supan index 6c20e4511..a08731b4a 100644 --- a/Example/Element/PCPE4DC.supan +++ b/Example/Element/PCPE4DC.supan @@ -30,18 +30,18 @@ set fixed_step_size true analyze # Node 3: -# 2.0000 2.0000 +# 2.0000 2.0000 # Displacement: -# 0 -0.2000 0 -0.2000 +# 0 -0.2000 0 -0.2000 # Resistance: -# -8.0000e+01 -2.0000e+02 -8.0000e+01 -8.0000e+01 +# -8.0000e+01 -2.0000e+02 -8.0000e+01 -8.0000e+01 # # Node 4: -# 0 2.0000 +# 0 2.0000 # Displacement: -# 0 -0.2000 0 -0.2000 +# 0 -0.2000 0 -0.2000 # Resistance: -# 8.0000e+01 -2.0000e+02 8.0000e+01 -8.0000e+01 +# 8.0000e+01 -2.0000e+02 8.0000e+01 -8.0000e+01 peek node 3 4 peek element 1 diff --git a/Example/Element/PCPE4UC.supan b/Example/Element/PCPE4UC.supan index 43bede0d2..58644adcb 100644 --- a/Example/Element/PCPE4UC.supan +++ b/Example/Element/PCPE4UC.supan @@ -26,18 +26,18 @@ set fixed_step_size true analyze # Node 3: -# 2.0000 2.0000 +# 2.0000 2.0000 # Displacement: -# 0 -0.2000 +# 0 -0.2000 # Resistance: -# -1.6000e+02 -2.8000e+02 +# -1.6000e+02 -2.8000e+02 # # Node 4: -# 0 2.0000 +# 0 2.0000 # Displacement: -# 0 -0.2000 +# 0 -0.2000 # Resistance: -# 1.6000e+02 -2.8000e+02 +# 1.6000e+02 -2.8000e+02 peek node 3 4 peek element 1 diff --git a/Example/Element/PCPE8DC.supan b/Example/Element/PCPE8DC.supan index 7a094334b..80e9c249b 100644 --- a/Example/Element/PCPE8DC.supan +++ b/Example/Element/PCPE8DC.supan @@ -31,25 +31,25 @@ set fixed_step_size true analyze # Node 3: -# 2.0000 2.0000 +# 2.0000 2.0000 # Displacement: -# 0 -0.2000 0 0 +# 0 -0.2000 0 0 # Resistance: -# -21.3333 -61.3333 -5.3333 -5.3333 +# -21.3333 -61.3333 -5.3333 -5.3333 # # Node 7: -# 1.0000 2.0000 +# 1.0000 2.0000 # Displacement: -# 9.7653e-19 -2.0000e-01 0 0 +# 9.7653e-19 -2.0000e-01 0 0 # Resistance: -# -1.4211e-14 -2.4533e+02 2.5264e-15 -2.1333e+01 +# -1.4211e-14 -2.4533e+02 2.5264e-15 -2.1333e+01 # # Node 4: -# 0 2.0000 +# 0 2.0000 # Displacement: -# 0 -0.2000 0 0 +# 0 -0.2000 0 0 # Resistance: -# 21.3333 -61.3333 5.3333 -5.3333 +# 21.3333 -61.3333 5.3333 -5.3333 peek node 3 7 4 peek element 1 diff --git a/Example/Element/PCPE8UC.supan b/Example/Element/PCPE8UC.supan index 3a2bf8c7a..bcbacdc1c 100644 --- a/Example/Element/PCPE8UC.supan +++ b/Example/Element/PCPE8UC.supan @@ -29,25 +29,25 @@ set fixed_step_size true analyze # Node 3: -# 2.0000 2.0000 +# 2.0000 2.0000 # Displacement: -# 0 -0.2000 +# 0 -0.2000 # Resistance: -# -53.3333 -93.3333 +# -53.3333 -93.3333 # # Node 7: -# 1.0000 2.0000 +# 1.0000 2.0000 # Displacement: -# 4.0135e-18 -2.0000e-01 +# 4.0135e-18 -2.0000e-01 # Resistance: -# 2.9729e-14 -3.7333e+02 +# 2.9729e-14 -3.7333e+02 # # Node 4: -# 0 2.0000 +# 0 2.0000 # Displacement: -# 0 -0.2000 +# 0 -0.2000 # Resistance: -# 53.3333 -93.3333 +# 53.3333 -93.3333 peek node 3 7 4 peek element 1 diff --git a/Example/Element/PatchCube.supan b/Example/Element/PatchCube.supan index 2dc3b1d7a..893b3ab81 100644 --- a/Example/Element/PatchCube.supan +++ b/Example/Element/PatchCube.supan @@ -29,32 +29,32 @@ analyze peek element 1 # Node 5: -# 0.5000 -0.5000 0.5000 1.0000 +# 0.5000 -0.5000 0.5000 1.0000 # Displacement: -# -0.0231 -0.0260 -0.0167 +# -0.0231 -0.0260 -0.0167 # Resistance: -# 8.6222e-10 -4.4714e-08 4.3957e-08 +# 8.6222e-10 -4.4714e-08 4.3957e-08 # # Node 6: -# 0.5000 0.5000 0.5000 1.0000 +# 0.5000 0.5000 0.5000 1.0000 # Displacement: -# -0.0370 -0.0370 0.1000 +# -0.0370 -0.0370 0.1000 # Resistance: -# -1.7522e-09 -1.7522e-09 1.3990e+01 +# -1.7522e-09 -1.7522e-09 1.3990e+01 # # Node 7: -# -0.5000 0.5000 0.5000 1.0000 +# -0.5000 0.5000 0.5000 1.0000 # Displacement: -# -0.0260 -0.0231 -0.0167 +# -0.0260 -0.0231 -0.0167 # Resistance: -# -4.4714e-08 8.6222e-10 4.3957e-08 +# -4.4714e-08 8.6222e-10 4.3957e-08 # # Node 8: -# -0.5000 -0.5000 0.5000 1.0000 +# -0.5000 -0.5000 0.5000 1.0000 # Displacement: -# -0.0265 -0.0265 -0.0125 +# -0.0265 -0.0265 -0.0125 # Resistance: -# -2.1847e-08 -2.1847e-08 4.4958e-08 +# -2.1847e-08 -2.1847e-08 4.4958e-08 peek node 5 6 7 8 reset diff --git a/Example/Element/PatchQuad.supan b/Example/Element/PatchQuad.supan index 95326ed40..078353ced 100644 --- a/Example/Element/PatchQuad.supan +++ b/Example/Element/PatchQuad.supan @@ -21,18 +21,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 3: -# 0.7500 1.0000 1.0000 +# 0.7500 1.0000 1.0000 # Displacement: -# -0.0019 0.0033 +# -0.0019 0.0033 # Resistance: -# 1.3922e-09 1.0000e+00 +# 1.3922e-09 1.0000e+00 # # Node 4: -# 0 1.0000 1.0000 +# 0 1.0000 1.0000 # Displacement: -# -0.0015 0.0014 +# -0.0015 0.0014 # Resistance: -# -4.3968e-09 1.0000e+00 +# -4.3968e-09 1.0000e+00 peek node 3 4 peek element 1 diff --git a/Example/Element/SingleSection.supan b/Example/Element/SingleSection.supan index f3e939c8c..a493722d1 100644 --- a/Example/Element/SingleSection.supan +++ b/Example/Element/SingleSection.supan @@ -23,11 +23,11 @@ analyze peek element 1 # Node 1: -# 0 0 +# 0 0 # Displacement: -# 4.9538e-16 3.0060e+00 +# 4.9538e-16 3.0060e+00 # Resistance: -# 4.4409e-16 3.0000e+00 +# 4.4409e-16 3.0000e+00 peek node 1 reset diff --git a/Example/Element/Spring01.supan b/Example/Element/Spring01.supan index da843e328..1f56c1e0b 100644 --- a/Example/Element/Spring01.supan +++ b/Example/Element/Spring01.supan @@ -24,11 +24,11 @@ converger RelIncreDisp 1 1E-8 10 1 analyze # Node 3: -# 6.0000 0 +# 6.0000 0 # Displacement: -# 2.0000 0 +# 2.0000 0 # Resistance: -# 1.0000e+02 0 +# 1.0000e+02 0 peek node 3 peek element 1 diff --git a/Example/Element/Spring02.supan b/Example/Element/Spring02.supan index 9d1ac93ad..b2fa870f3 100644 --- a/Example/Element/Spring02.supan +++ b/Example/Element/Spring02.supan @@ -25,11 +25,11 @@ converger RelIncreDisp 1 1E-8 10 1 analyze # Node 3: -# 3.0000 0 +# 3.0000 0 # Displacement: -# 0.5000 0 +# 0.5000 0 # Resistance: -# 10.0000 0 +# 10.0000 0 peek node 3 peek element 1 diff --git a/Example/Element/T3D2S.supan b/Example/Element/T3D2S.supan index 3ec7724d4..e1630549e 100644 --- a/Example/Element/T3D2S.supan +++ b/Example/Element/T3D2S.supan @@ -32,7 +32,7 @@ converger RelIncreDisp 1 1E-10 10 1 analyze -# 0.0020 0.0075 0.0020 +# 0.0020 0.0075 0.0020 peek node 4 peek element 1 diff --git a/Example/Material/ArmstrongFrederick.supan b/Example/Material/ArmstrongFrederick.supan index d61a65665..b1b4c10b7 100644 --- a/Example/Material/ArmstrongFrederick.supan +++ b/Example/Material/ArmstrongFrederick.supan @@ -37,32 +37,32 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 9: -# 5.0000 -5.0000 20.0000 +# 5.0000 -5.0000 20.0000 # Displacement: -# 0 0 0.5000 +# 0 0 0.5000 # Resistance: -# -3.6819e-13 4.4548e-12 1.2885e+04 +# -3.6819e-13 4.4548e-12 1.2885e+04 # # Node 10: -# 5.0000 5.0000 20.0000 +# 5.0000 5.0000 20.0000 # Displacement: -# 0 -0.1173 0.5000 +# 0 -0.1173 0.5000 # Resistance: -# 1.4952e-12 5.6687e-13 1.2885e+04 +# 1.4952e-12 5.6687e-13 1.2885e+04 # # Node 11: -# -5.0000 5.0000 20.0000 +# -5.0000 5.0000 20.0000 # Displacement: -# 0.1173 -0.1173 0.5000 +# 0.1173 -0.1173 0.5000 # Resistance: -# 7.3579e-13 1.9320e-12 1.2885e+04 +# 7.3579e-13 1.9320e-12 1.2885e+04 # # Node 12: -# -5.0000 -5.0000 20.0000 +# -5.0000 -5.0000 20.0000 # Displacement: -# 0.1173 0 0.5000 +# 0.1173 0 0.5000 # Resistance: -# -1.4472e-12 -2.8195e-13 1.2885e+04 +# -1.4472e-12 -2.8195e-13 1.2885e+04 peek node 9 10 11 12 reset diff --git a/Example/Material/ArmstrongFrederick1D.supan b/Example/Material/ArmstrongFrederick1D.supan index b80676eb5..cb1472d07 100644 --- a/Example/Material/ArmstrongFrederick1D.supan +++ b/Example/Material/ArmstrongFrederick1D.supan @@ -26,11 +26,11 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# -0.0109 1.0000 +# -0.0109 1.0000 # Resistance: -# 4.2649e-05 3.1987e+03 +# 4.2649e-05 3.1987e+03 peek node 2 reset diff --git a/Example/Material/Bilinear1D.supan b/Example/Material/Bilinear1D.supan index b0c8956f2..79f3a2fb4 100644 --- a/Example/Material/Bilinear1D.supan +++ b/Example/Material/Bilinear1D.supan @@ -21,11 +21,11 @@ converger RelIncreDisp 1 1E-4 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# -1.8667 7.7667 +# -1.8667 7.7667 # Resistance: -# 1.3333e-06 1.0000e+02 +# 1.3333e-06 1.0000e+02 peek node 2 peek element 1 diff --git a/Example/Material/Bilinear2D.supan b/Example/Material/Bilinear2D.supan index ce1f83ba0..cddb61062 100644 --- a/Example/Material/Bilinear2D.supan +++ b/Example/Material/Bilinear2D.supan @@ -175,11 +175,11 @@ converger RelIncreDisp 1 1E-10 50 1 analyze # Node 83: -# 4.0000 0.5000 +# 4.0000 0.5000 # Displacement: -# 4.4542e-19 2.0000e-02 1.0484e-02 +# 4.4542e-19 2.0000e-02 1.0484e-02 # Resistance: -# -3.8858e-16 1.0780e-01 2.8459e-17 +# -3.8858e-16 1.0780e-01 2.8459e-17 peek node 83 peek element 1 diff --git a/Example/Material/BilinearCC.supan b/Example/Material/BilinearCC.supan index 29ec96bc5..42ce6df79 100644 --- a/Example/Material/BilinearCC.supan +++ b/Example/Material/BilinearCC.supan @@ -31,11 +31,11 @@ converger RelIncreDisp 1 1E-13 50 1 analyze # Node 6: -# 5.0000 5.0000 5.0000 +# 5.0000 5.0000 5.0000 # Displacement: -# 0 0.0014 -0.0020 +# 0 0.0014 -0.0020 # Resistance: -# 1.6604e-13 2.0946e-13 -1.8623e+03 +# 1.6604e-13 2.0946e-13 -1.8623e+03 peek node 6 peek element 1 diff --git a/Example/Material/BilinearElastic1D.supan b/Example/Material/BilinearElastic1D.supan index 6c3c1806b..585647524 100644 --- a/Example/Material/BilinearElastic1D.supan +++ b/Example/Material/BilinearElastic1D.supan @@ -23,11 +23,11 @@ converger RelIncreDisp 1 1E-4 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# -0.5333 5.9889 +# -0.5333 5.9889 # Resistance: -# 1.3333e-06 1.0000e+02 +# 1.3333e-06 1.0000e+02 peek node 2 peek element 1 2 diff --git a/Example/Material/BilinearHoffman.supan b/Example/Material/BilinearHoffman.supan index 5324109c4..021596422 100644 --- a/Example/Material/BilinearHoffman.supan +++ b/Example/Material/BilinearHoffman.supan @@ -33,11 +33,11 @@ analyze peek element 1 # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 -0.0974 +# 0 0 -0.0974 # Resistance: -# 2.6051e+03 -1.2345e+04 3.8586e-12 +# 2.6051e+03 -1.2345e+04 3.8586e-12 peek node 5 # save recorder 1 diff --git a/Example/Material/BilinearMises1D.supan b/Example/Material/BilinearMises1D.supan index d0606c7fb..3d8d95ec3 100644 --- a/Example/Material/BilinearMises1D.supan +++ b/Example/Material/BilinearMises1D.supan @@ -23,11 +23,11 @@ converger AbsIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# 1.0000 0 +# 1.0000 0 # Resistance: -# 30.0000 0 +# 30.0000 0 peek node 2 reset diff --git a/Example/Material/BilinearPeric.supan b/Example/Material/BilinearPeric.supan index 64c7c460a..4a42e385e 100644 --- a/Example/Material/BilinearPeric.supan +++ b/Example/Material/BilinearPeric.supan @@ -46,32 +46,32 @@ analyze peek element 1 # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 -0.5000 +# 0 0 -0.5000 # Resistance: -# -5.0820e-04 5.0820e-04 -1.4742e+04 +# -5.0820e-04 5.0820e-04 -1.4742e+04 # # Node 6: -# 5.0000 5.0000 5.0000 +# 5.0000 5.0000 5.0000 # Displacement: -# 0 0.2441 -0.5000 +# 0 0.2441 -0.5000 # Resistance: -# -5.0820e-04 -5.0820e-04 -1.4742e+04 +# -5.0820e-04 -5.0820e-04 -1.4742e+04 # # Node 7: -# -5.0000 5.0000 5.0000 +# -5.0000 5.0000 5.0000 # Displacement: -# -0.2441 0.2441 -0.5000 +# -0.2441 0.2441 -0.5000 # Resistance: -# 5.0820e-04 -5.0820e-04 -1.4742e+04 +# 5.0820e-04 -5.0820e-04 -1.4742e+04 # # Node 8: -# -5.0000 -5.0000 5.0000 +# -5.0000 -5.0000 5.0000 # Displacement: -# -0.2441 0 -0.5000 +# -0.2441 0 -0.5000 # Resistance: -# 5.0820e-04 5.0820e-04 -1.4742e+04 +# 5.0820e-04 5.0820e-04 -1.4742e+04 peek node 5 6 7 8 reset diff --git a/Example/Material/BilinearViscosity.supan b/Example/Material/BilinearViscosity.supan index 41b342caa..e01bdbbf8 100644 --- a/Example/Material/BilinearViscosity.supan +++ b/Example/Material/BilinearViscosity.supan @@ -36,15 +36,15 @@ converger RelIncreDisp 1 1E-10 4 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 18.2589 0 +# 18.2589 0 # Resistance: -# 2.3505e+03 0 +# 2.3505e+03 0 # Velocity: -# 9.0747 0 +# 9.0747 0 # Acceleration: -# -19.4431 0 +# -19.4431 0 peek node 2 exit \ No newline at end of file diff --git a/Example/Material/BlatzKo.supan b/Example/Material/BlatzKo.supan index e1e905de6..b81e1c872 100644 --- a/Example/Material/BlatzKo.supan +++ b/Example/Material/BlatzKo.supan @@ -36,11 +36,11 @@ peek element 1 # save recorder 1 2 # Node 5: -# 0.5000 -0.5000 0.5000 +# 0.5000 -0.5000 0.5000 # Displacement: -# 0 0 4.0000 +# 0 0 4.0000 # Resistance: -# 5.1327e-02 -5.1327e-02 5.0989e+06 +# 5.1327e-02 -5.1327e-02 5.0989e+06 peek node 5 6 7 8 reset diff --git a/Example/Material/CDP.supan b/Example/Material/CDP.supan index 528e7e0ba..0f4450413 100644 --- a/Example/Material/CDP.supan +++ b/Example/Material/CDP.supan @@ -55,32 +55,36 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 9: -# 5.0000 -5.0000 20.0000 +# Coordinate: +# 5.0000 -5.0000 20.0000 # Displacement: -# 0 0 -0.0500 +# 0 0 -0.0500 # Resistance: -# -8.9425e-14 7.8143e-14 -9.1689e+02 +# 1.8961e-14 -2.3377e-14 -9.1460e+02 # # Node 10: -# 5.0000 5.0000 20.0000 +# Coordinate: +# 5.0000 5.0000 20.0000 # Displacement: -# 0 0.0135 -0.0500 +# 0 0.0135 -0.0500 # Resistance: -# 8.9725e-14 2.1507e-13 -9.1689e+02 +# 1.7761e-14 3.5470e-15 -9.1460e+02 # # Node 11: -# -5.0000 5.0000 20.0000 +# Coordinate: +# -5.0000 5.0000 20.0000 # Displacement: -# -0.0135 0.0135 -0.0500 +# -0.0135 0.0135 -0.0500 # Resistance: -# 1.5641e-14 -9.8162e-15 -9.1689e+02 +# -1.3575e-13 4.6065e-14 -9.1460e+02 # # Node 12: -# -5.0000 -5.0000 20.0000 +# Coordinate: +# -5.0000 -5.0000 20.0000 # Displacement: -# -0.0135 0 -0.0500 +# -0.0135 0 -0.0500 # Resistance: -# -4.4123e-14 4.3377e-14 -9.1689e+02 +# -8.0123e-14 -2.1897e-15 -9.1460e+02 peek node 9 10 11 12 peek element 1 diff --git a/Example/Material/CDPM2.supan b/Example/Material/CDPM2.supan index 5740e3338..15b93ded7 100644 --- a/Example/Material/CDPM2.supan +++ b/Example/Material/CDPM2.supan @@ -42,7 +42,38 @@ analyze peek element 1 -# peek node 9 10 11 12 +# Node 9: +# Coordinate: +# 5.0000 -5.0000 20.0000 +# Displacement: +# 0 0 -0.1000 +# Resistance: +# -1.1333e-13 1.5915e-13 -4.0729e+00 +# +# Node 10: +# Coordinate: +# 5.0000 5.0000 20.0000 +# Displacement: +# 0 0.0370 -0.1000 +# Resistance: +# -1.0423e-13 8.8103e-16 -4.0729e+00 +# +# Node 11: +# Coordinate: +# -5.0000 5.0000 20.0000 +# Displacement: +# -0.0370 0.0370 -0.1000 +# Resistance: +# 4.5427e-16 7.5096e-16 -4.0729e+00 +# +# Node 12: +# Coordinate: +# -5.0000 -5.0000 20.0000 +# Displacement: +# -0.0370 0 -0.1000 +# Resistance: +# -5.9986e-16 1.4301e-13 -4.0729e+00 +peek node 9 10 11 12 # save recorder 1 diff --git a/Example/Material/CDPM2PS.supan b/Example/Material/CDPM2PS.supan index 5ac89c922..6ac6dbb75 100644 --- a/Example/Material/CDPM2PS.supan +++ b/Example/Material/CDPM2PS.supan @@ -29,18 +29,18 @@ analyze # save recorder 1 # Node 3: -# 1.0000 1.0000 +# 1.0000 1.0000 # Displacement: -# -0.0008 0.0009 +# -0.0008 0.0009 # Resistance: -# -2.4842e-01 -1.3184e-16 +# -2.4842e-01 -1.3184e-16 # # Node 4: -# 0 1.0000 +# 0 1.0000 # Displacement: -# -0.0008 -0.0002 +# -0.0008 -0.0002 # Resistance: -# -5.2314e-01 -6.9389e-18 +# -5.2314e-01 -6.9389e-18 peek node 3 4 reset diff --git a/Example/Material/CDPPS.supan b/Example/Material/CDPPS.supan index c1eaf16b6..0ef77e706 100644 --- a/Example/Material/CDPPS.supan +++ b/Example/Material/CDPPS.supan @@ -45,18 +45,18 @@ analyze peek element 1 # Node 3: -# 1.0000 1.0000 +# 1.0000 1.0000 # Displacement: -# -0.0030 -0.0300 +# -0.0030 -0.0300 # Resistance: -# -0.1119 -0.1542 +# -0.1119 -0.1542 # # Node 4: -# 0 1.0000 +# 0 1.0000 # Displacement: -# -0.0030 -0.0300 +# -0.0030 -0.0300 # Resistance: -# 0.0693 -0.2005 +# 0.0693 -0.2005 peek node 3 4 reset diff --git a/Example/Material/Concrete01.supan b/Example/Material/Concrete01.supan index de059afdc..0dd1404e5 100644 --- a/Example/Material/Concrete01.supan +++ b/Example/Material/Concrete01.supan @@ -29,11 +29,11 @@ converger RelIncreDisp 1 1E-8 10 1 analyze # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# -0.0100 0 +# -0.0100 0 # Resistance: -# -1.7449e+03 0 +# -1.7449e+03 0 peek node 3 # save recorder 1 2 diff --git a/Example/Material/ConcreteCM2.supan b/Example/Material/ConcreteCM2.supan index 91a1ae31b..b25206a87 100644 --- a/Example/Material/ConcreteCM2.supan +++ b/Example/Material/ConcreteCM2.supan @@ -31,11 +31,11 @@ analyze # save recorder 1 2 # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# -0.0100 0 +# -0.0100 0 # Resistance: -# -1.6528e+03 0 +# -1.6528e+03 0 peek node 3 reset diff --git a/Example/Material/DP.supan b/Example/Material/DP.supan index 81cd09016..f710b0b34 100644 --- a/Example/Material/DP.supan +++ b/Example/Material/DP.supan @@ -35,11 +35,11 @@ analyze # Node 5: # Coordinate: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 -0.2000 +# 0 0 -0.2000 # Resistance: -# -3.9787e-13 6.2223e-13 -4.8162e+02 +# -3.9787e-13 6.2223e-13 -4.8162e+02 peek node 5 # save recorder 1 diff --git a/Example/Material/ExpCC.supan b/Example/Material/ExpCC.supan index d996a0fa7..5dea1cae1 100644 --- a/Example/Material/ExpCC.supan +++ b/Example/Material/ExpCC.supan @@ -31,11 +31,11 @@ converger RelIncreDisp 1 1E-12 50 1 analyze # Node 6: -# 5.0000 5.0000 5.0000 +# 5.0000 5.0000 5.0000 # Displacement: -# 0 0.0262 -0.0200 +# 0 0.0262 -0.0200 # Resistance: -# -6.8585e-12 -6.4791e-12 -1.5084e+04 +# -6.8585e-12 -6.4791e-12 -1.5084e+04 peek node 6 peek element 1 diff --git a/Example/Material/ExpDP.supan b/Example/Material/ExpDP.supan index 8ac2d11b2..8c169e0c5 100644 --- a/Example/Material/ExpDP.supan +++ b/Example/Material/ExpDP.supan @@ -45,11 +45,11 @@ converger RelIncreDisp 1 1E-10 50 1 analyze # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 -0.1000 +# 0 0 -0.1000 # Resistance: -# -1.2580e-08 1.2580e-08 -1.0071e+00 +# -1.2580e-08 1.2580e-08 -1.0071e+00 peek node 5 6 7 8 peek element 1 diff --git a/Example/Material/ExpGurson.supan b/Example/Material/ExpGurson.supan index 9a47fc137..2c7f170a9 100644 --- a/Example/Material/ExpGurson.supan +++ b/Example/Material/ExpGurson.supan @@ -37,11 +37,11 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 9: -# 5.0000 -5.0000 20.0000 +# 5.0000 -5.0000 20.0000 # Displacement: -# 0 0 0.5000 +# 0 0 0.5000 # Resistance: -# -8.0815e-15 1.1129e-14 1.6991e+01 +# -8.0815e-15 1.1129e-14 1.6991e+01 peek node 9 reset diff --git a/Example/Material/ExpGurson1D.supan b/Example/Material/ExpGurson1D.supan index 39f05e85c..6b8df380d 100644 --- a/Example/Material/ExpGurson1D.supan +++ b/Example/Material/ExpGurson1D.supan @@ -34,11 +34,11 @@ save recorder 1 2 peek element 1 # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0.0500 0 +# 0.0500 0 # Resistance: -# 63.4232 0 +# 63.4232 0 peek node 3 save recorder 1 diff --git a/Example/Material/ExpHoffman.supan b/Example/Material/ExpHoffman.supan index 3ac233a4f..ffd073234 100644 --- a/Example/Material/ExpHoffman.supan +++ b/Example/Material/ExpHoffman.supan @@ -31,11 +31,11 @@ analyze peek element 1 # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# -0.0264 0.2000 0.0621 +# -0.0264 0.2000 0.0621 # Resistance: -# 2.7689e-12 5.6473e+03 -4.9205e-13 +# 2.7689e-12 5.6473e+03 -4.9205e-13 peek node 5 # save recorder 1 diff --git a/Example/Material/ExpJ2.supan b/Example/Material/ExpJ2.supan index c3c9db22f..079d7a48d 100644 --- a/Example/Material/ExpJ2.supan +++ b/Example/Material/ExpJ2.supan @@ -32,11 +32,11 @@ converger RelIncreDisp 1 1E-8 50 1 analyze # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 0.5000 +# 0 0 0.5000 # Resistance: -# -5.9329e-15 4.9515e-15 1.0131e+00 +# -5.9329e-15 4.9515e-15 1.0131e+00 peek node 5 6 7 8 peek element 1 diff --git a/Example/Material/ExpMises1D.supan b/Example/Material/ExpMises1D.supan index c26562b98..fb49f17c5 100644 --- a/Example/Material/ExpMises1D.supan +++ b/Example/Material/ExpMises1D.supan @@ -25,11 +25,11 @@ analyze peek element 1 # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# 1.0000 0 +# 1.0000 0 # Resistance: -# 1.2255e+02 0 +# 1.2255e+02 0 peek node 2 reset diff --git a/Example/Material/Kelvin.supan b/Example/Material/Kelvin.supan index 471dee278..c6edb5e31 100644 --- a/Example/Material/Kelvin.supan +++ b/Example/Material/Kelvin.supan @@ -40,15 +40,15 @@ converger RelIncreDisp 1 1E-11 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# -0.0002 0 +# -0.0002 0 # Resistance: -# -0.0690 0 +# -0.0690 0 # Velocity: -# 6.5088e-05 0 +# 6.5088e-05 0 # Acceleration: -# 0.0066 0 +# 0.0066 0 peek node 2 reset diff --git a/Example/Material/Laminated.supan b/Example/Material/Laminated.supan index 37d45abb9..5db5d1da1 100644 --- a/Example/Material/Laminated.supan +++ b/Example/Material/Laminated.supan @@ -30,18 +30,18 @@ analyze peek element 1 # Node 3: -# 1.0000 1.0000 +# 1.0000 1.0000 # Displacement: -# 0 -0.0262 +# 0 -0.0262 # Resistance: -# -50.0553 -31.1029 +# -50.0553 -31.1029 # # Node 4: -# 0 1.0000 +# 0 1.0000 # Displacement: -# 0.0500 0.0200 +# 0.0500 0.0200 # Resistance: -# 40.8311 10.0720 +# 40.8311 10.0720 peek node 3 4 reset diff --git a/Example/Material/LinearDamage.supan b/Example/Material/LinearDamage.supan index 0b765338b..621cef85a 100644 --- a/Example/Material/LinearDamage.supan +++ b/Example/Material/LinearDamage.supan @@ -105,7 +105,7 @@ converger RelIncreDisp 1 1E-8 40 1 analyze -# -0.0705 -0.0497 1.0000 +# -0.0705 -0.0497 1.0000 peek node 12 peek element 1 diff --git a/Example/Material/LinearHoffman.supan b/Example/Material/LinearHoffman.supan index 7b4a78110..d1713b6e4 100644 --- a/Example/Material/LinearHoffman.supan +++ b/Example/Material/LinearHoffman.supan @@ -32,11 +32,11 @@ converger RelIncreDisp 1 1E-14 50 1 analyze # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 0.0500 +# 0 0 0.0500 # Resistance: -# 1.4398e-04 -1.4398e-04 6.9317e+03 +# 1.4398e-04 -1.4398e-04 6.9317e+03 peek node 5 reset diff --git a/Example/Material/MPF.supan b/Example/Material/MPF.supan index 9eea228d3..ee08e08bc 100644 --- a/Example/Material/MPF.supan +++ b/Example/Material/MPF.supan @@ -22,11 +22,11 @@ converger RelIncreDisp 1 1E-8 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# -2.4667 10.6500 +# -2.4667 10.6500 # Resistance: -# 1.0667e-06 8.0000e+01 +# 1.0667e-06 8.0000e+01 peek node 2 peek element 1 2 diff --git a/Example/Material/Maxwell.supan b/Example/Material/Maxwell.supan index 3b79da60d..1e1100577 100644 --- a/Example/Material/Maxwell.supan +++ b/Example/Material/Maxwell.supan @@ -38,15 +38,15 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# -0.0001 0 +# -0.0001 0 # Resistance: -# -0.0093 0 +# -0.0093 0 # Velocity: -# 0.0124 0 +# 0.0124 0 # Acceleration: -# 0.0009 0 +# 0.0009 0 peek node 2 # save recorder 1 2 3 4 5 6 7 diff --git a/Example/Material/MooneyRivlin.supan b/Example/Material/MooneyRivlin.supan index e823cf008..6d68c143a 100644 --- a/Example/Material/MooneyRivlin.supan +++ b/Example/Material/MooneyRivlin.supan @@ -32,11 +32,11 @@ analyze peek element 1 # Node 3: -# 1.0000 1.0000 0 +# 1.0000 1.0000 0 # Displacement: -# -1.0000 0.7514 0 +# -1.0000 0.7514 0 # Resistance: -# -3.6345e+02 -3.9355e-10 2.3739e+02 +# -3.6345e+02 -3.9355e-10 2.3739e+02 peek node 3 reset diff --git a/Example/Material/MultilinearJ2.supan b/Example/Material/MultilinearJ2.supan index f9f405caa..6aded4483 100644 --- a/Example/Material/MultilinearJ2.supan +++ b/Example/Material/MultilinearJ2.supan @@ -31,7 +31,7 @@ converger RelIncreDisp 1 1E-8 50 1 analyze # Resistance: -# 4.1942e-05 -4.1942e-05 5.2083e+03 +# 4.1942e-05 -4.1942e-05 5.2083e+03 peek node 5 6 7 8 peek element 1 diff --git a/Example/Material/MultilinearMises1D.supan b/Example/Material/MultilinearMises1D.supan index 9fcaf0403..8a8c1287d 100644 --- a/Example/Material/MultilinearMises1D.supan +++ b/Example/Material/MultilinearMises1D.supan @@ -24,11 +24,11 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# 0.1000 0 +# 0.1000 0 # Resistance: -# 2.5980e+03 0 +# 2.5980e+03 0 peek node 2 peek element 1 diff --git a/Example/Material/NLE1D01.supan b/Example/Material/NLE1D01.supan index 1763b4584..32cda87fd 100644 --- a/Example/Material/NLE1D01.supan +++ b/Example/Material/NLE1D01.supan @@ -21,11 +21,11 @@ converger RelIncreDisp 1 1E-4 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# -0.2133 1.3956 +# -0.2133 1.3956 # Resistance: -# -2.1579e-11 4.0000e+01 +# -2.1579e-11 4.0000e+01 peek node 2 peek element 1 2 diff --git a/Example/Material/OrthotropicElastic3D.supan b/Example/Material/OrthotropicElastic3D.supan index 7d3442633..d1a6978b5 100644 --- a/Example/Material/OrthotropicElastic3D.supan +++ b/Example/Material/OrthotropicElastic3D.supan @@ -35,7 +35,7 @@ analyze peek element 1 # Resistance: -# -2.0264e+01 -5.1203e-01 -1.0316e+02 +# -2.0264e+01 -5.1203e-01 -1.0316e+02 peek node 5 # save recorder 1 diff --git a/Example/Material/ParabolicCC.supan b/Example/Material/ParabolicCC.supan index b6d1c1064..48ff050dd 100644 --- a/Example/Material/ParabolicCC.supan +++ b/Example/Material/ParabolicCC.supan @@ -31,7 +31,7 @@ converger AbsIncreDisp 1 1E-13 10 1 analyze # Resistance: -# -5.8435e+03 5.8435e+03 -7.7404e+03 +# -5.8435e+03 5.8435e+03 -7.7404e+03 peek node 5 6 7 8 peek element 1 diff --git a/Example/Material/Parallel.supan b/Example/Material/Parallel.supan index 878044e89..407e0cc56 100644 --- a/Example/Material/Parallel.supan +++ b/Example/Material/Parallel.supan @@ -34,11 +34,11 @@ analyze peek element 1 # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0.5000 0 +# 0.5000 0 # Resistance: -# 4.3018e+03 0 +# 4.3018e+03 0 peek node 3 reset diff --git a/Example/Material/PolyJ2.supan b/Example/Material/PolyJ2.supan index 12f742c49..5e28c8946 100644 --- a/Example/Material/PolyJ2.supan +++ b/Example/Material/PolyJ2.supan @@ -31,32 +31,32 @@ converger RelIncreDisp 1 1E-12 50 1 analyze # Node 5: -# 5.0000 -5.0000 5.0000 +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 0.5000 +# 0 0 0.5000 # Resistance: -# 2.9714e-06 -2.9714e-06 6.3773e+02 +# 2.9714e-06 -2.9714e-06 6.3773e+02 # # Node 6: -# 5.0000 5.0000 5.0000 +# 5.0000 5.0000 5.0000 # Displacement: -# 0 -0.1543 0.5000 +# 0 -0.1543 0.5000 # Resistance: -# 2.9714e-06 2.9714e-06 6.3773e+02 +# 2.9714e-06 2.9714e-06 6.3773e+02 # # Node 7: -# -5.0000 5.0000 5.0000 +# -5.0000 5.0000 5.0000 # Displacement: -# 0.1543 -0.1543 0.5000 +# 0.1543 -0.1543 0.5000 # Resistance: -# -2.9714e-06 2.9714e-06 6.3773e+02 +# -2.9714e-06 2.9714e-06 6.3773e+02 # # Node 8: -# -5.0000 -5.0000 5.0000 +# -5.0000 -5.0000 5.0000 # Displacement: -# 0.1543 0 0.5000 +# 0.1543 0 0.5000 # Resistance: -# -2.9714e-06 -2.9714e-06 6.3773e+02 +# -2.9714e-06 -2.9714e-06 6.3773e+02 peek node 5 6 7 8 peek element 1 diff --git a/Example/Material/RO.supan b/Example/Material/RO.supan index 218000436..4835369d8 100644 --- a/Example/Material/RO.supan +++ b/Example/Material/RO.supan @@ -25,11 +25,11 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 3: -# 6.0000 0 +# 6.0000 0 # Displacement: -# 5.0703 0 +# 5.0703 0 # Resistance: -# 1.0000e+02 0 +# 1.0000e+02 0 peek node 3 peek element 1 diff --git a/Example/Material/Sequential.supan b/Example/Material/Sequential.supan index c8efcc18a..81d6f171d 100644 --- a/Example/Material/Sequential.supan +++ b/Example/Material/Sequential.supan @@ -29,11 +29,11 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0.0050 0 +# 0.0050 0 # Resistance: -# 16.6666 0 +# 16.6666 0 peek node 3 peek element 1 diff --git a/Example/Material/SimpleSand.supan b/Example/Material/SimpleSand.supan index bb6ee81c9..cef007678 100644 --- a/Example/Material/SimpleSand.supan +++ b/Example/Material/SimpleSand.supan @@ -37,11 +37,11 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 10: -# 5.0000 5.0000 20.0000 +# 5.0000 5.0000 20.0000 # Displacement: -# 0 0 -0.1000 +# 0 0 -0.1000 # Resistance: -# -1.2126e+03 -1.2126e+03 -1.2633e+03 +# -1.2126e+03 -1.2126e+03 -1.2633e+03 peek node 10 peek element 1 diff --git a/Example/Material/Sinh.supan b/Example/Material/Sinh.supan new file mode 100644 index 000000000..2b3b22f7c --- /dev/null +++ b/Example/Material/Sinh.supan @@ -0,0 +1,66 @@ +node 1 0 0 +node 2 1 0 +node 3 2 0 +node 4 3 0 + +material Tanh1D 1 1000 +material Elastic1D 2 100 +material Sinh1D 3 1000 +material Viscosity01 4 1 5 + +element Spring01 1 1 2 1 +element Spring01 2 2 3 2 +element Spring01 3 3 4 3 +element Damper01 4 2 3 4 + +mass 5 2 1 1 +mass 6 3 1 1 + +fix2 1 1 1 4 +fix2 2 2 1 2 3 4 + +initial velocity 100 1 2 + +hdf5recorder 1 Node U1 2 3 +hdf5recorder 2 Node V1 2 3 + +step dynamic 1 1 +set ini_step_size 1E-2 +set fixed_step_size 1 + +converger RelIncreDisp 1 1E-11 10 1 + +analyze + +# Node 2: +# Coordinate: +# 1.0000 0 +# Displacement: +# 0.3815 0 +# Resistance: +# 4.2847e+02 0 +# Velocity: +# 26.5382 0 +# Acceleration: +# -4.2847e+02 0 +# +# Node 3: +# Coordinate: +# 2.0000 0 +# Displacement: +# 0.4516 0 +# Resistance: +# 4.0270e+02 0 +# Velocity: +# 12.2488 0 +# Acceleration: +# -4.0270e+02 0 +peek node 2 3 + +peek element 1 3 + +# save recorder 1 2 + +reset +clear +exit diff --git a/Example/Material/SlipLock.supan b/Example/Material/SlipLock.supan index 6fe9bbc97..9545b3341 100644 --- a/Example/Material/SlipLock.supan +++ b/Example/Material/SlipLock.supan @@ -31,11 +31,11 @@ analyze peek element 1 # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# -0.0020 0 +# -0.0020 0 # Resistance: -# -0.2618 0 +# -0.2618 0 peek node 3 reset diff --git a/Example/Material/SteelBRB.supan b/Example/Material/SteelBRB.supan index 8ed1a55cb..67ec1e0ad 100644 --- a/Example/Material/SteelBRB.supan +++ b/Example/Material/SteelBRB.supan @@ -26,11 +26,11 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# -0.2233 1.0000 +# -0.2233 1.0000 # Resistance: -# -3.5470e-11 3.8243e+03 +# -3.5470e-11 3.8243e+03 peek node 2 reset diff --git a/Example/Material/Substepping.supan b/Example/Material/Substepping.supan index 4adbf33c5..1621e2cee 100644 --- a/Example/Material/Substepping.supan +++ b/Example/Material/Substepping.supan @@ -32,11 +32,11 @@ converger RelIncreDisp 1 1E-12 50 1 analyze # Node 6: -# 5.0000 5.0000 5.0000 +# 5.0000 5.0000 5.0000 # Displacement: -# 0 0.0262 -0.0200 +# 0 0.0262 -0.0200 # Resistance: -# -6.8585e-12 -6.4791e-12 -1.5084e+04 +# -6.8585e-12 -6.4791e-12 -1.5084e+04 peek node 6 # save recorder 1 diff --git a/Example/Material/VAFCRP.supan b/Example/Material/VAFCRP.supan index 42d2c6ccb..4c1abbb3d 100644 --- a/Example/Material/VAFCRP.supan +++ b/Example/Material/VAFCRP.supan @@ -37,32 +37,32 @@ converger RelIncreDisp 1 1E-12 10 1 analyze # Node 9: -# 5.0000 -5.0000 20.0000 +# 5.0000 -5.0000 20.0000 # Displacement: -# 0 0 0.5000 +# 0 0 0.5000 # Resistance: -# 1.0651e-13 -4.8376e-13 1.3199e+04 +# 1.0651e-13 -4.8376e-13 1.3199e+04 # # Node 10: -# 5.0000 5.0000 20.0000 +# 5.0000 5.0000 20.0000 # Displacement: -# 0 -0.1171 0.5000 +# 0 -0.1171 0.5000 # Resistance: -# 4.1362e-12 -1.1330e-13 1.3199e+04 +# 4.1362e-12 -1.1330e-13 1.3199e+04 # # Node 11: -# -5.0000 5.0000 20.0000 +# -5.0000 5.0000 20.0000 # Displacement: -# 0.1171 -0.1171 0.5000 +# 0.1171 -0.1171 0.5000 # Resistance: -# -4.2658e-13 2.0193e-12 1.3199e+04 +# -4.2658e-13 2.0193e-12 1.3199e+04 # # Node 12: -# -5.0000 -5.0000 20.0000 +# -5.0000 -5.0000 20.0000 # Displacement: -# 0.1171 0 0.5000 +# 0.1171 0 0.5000 # Resistance: -# 2.8548e-12 -1.4391e-12 1.3199e+04 +# 2.8548e-12 -1.4391e-12 1.3199e+04 peek node 9 10 11 12 reset diff --git a/Example/Material/VAFCRP1D.supan b/Example/Material/VAFCRP1D.supan index dd4773b07..21d147d23 100644 --- a/Example/Material/VAFCRP1D.supan +++ b/Example/Material/VAFCRP1D.supan @@ -29,11 +29,11 @@ analyze # save recorder 1 2 # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# -0.1000 0 +# -0.1000 0 # Resistance: -# -49.0640 0 +# -49.0640 0 peek node 3 reset diff --git a/Example/Material/Yeoh.supan b/Example/Material/Yeoh.supan index 49f0f6886..590ddc1d6 100644 --- a/Example/Material/Yeoh.supan +++ b/Example/Material/Yeoh.supan @@ -32,11 +32,11 @@ analyze peek element 1 # Node 3: -# 1.0000 1.0000 0 +# 1.0000 1.0000 0 # Displacement: -# -1.0000 0.1701 0 +# -1.0000 0.1701 0 # Resistance: -# -1.0040e+03 1.2968e-12 1.0136e+03 +# -1.0040e+03 1.2968e-12 1.0136e+03 peek node 3 reset diff --git a/Example/Other/BodyForce.supan b/Example/Other/BodyForce.supan index 58231d820..0020fd8ca 100644 --- a/Example/Other/BodyForce.supan +++ b/Example/Other/BodyForce.supan @@ -23,11 +23,12 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# Coordinate: +# 1.0000 0 # Displacement: -# -4.0000e-05 0 +# -4.0000e-05 0 # Resistance: -# 3.5293e-18 -5.0000e-01 +# 1.8361e-17 -5.0000e-01 peek node 2 peek element 1 diff --git a/Example/Other/Collision2D.supan b/Example/Other/Collision2D.supan index 6050283e1..d3616bcaf 100644 --- a/Example/Other/Collision2D.supan +++ b/Example/Other/Collision2D.supan @@ -58,7 +58,7 @@ rigidwall 3 0 5 0 -1 1E0 rigidwall 4 -5 0 1 0 1E0 rigidwall 5 0 -5 0 1 1E0 -step dynamic 1 200 +step dynamic 1 2 set ini_step_size 2E-2 set fixed_step_size 1 set band_mat false @@ -68,6 +68,19 @@ converger RelIncreDisp 1 1E-11 10 1 analyze +# Node 2: +# Coordinate: +# 1.7580 2.2530 +# Displacement: +# 0.5282 -1.4996 +# Resistance: +# 0 0 +# Velocity: +# 0.2642 -0.7496 +# Acceleration: +# 0.0079 0.0086 +peek node 2 + # save recorder 1 exit diff --git a/Example/Other/Collision3D.supan b/Example/Other/Collision3D.supan index 03638459e..9fa32f23d 100644 --- a/Example/Other/Collision3D.supan +++ b/Example/Other/Collision3D.supan @@ -58,7 +58,7 @@ rigidwall 3 0 5 0 0 -1 0 1E0 rigidwall 4 -5 0 0 1 0 0 1E0 rigidwall 5 0 -5 0 0 1 0 1E0 -step dynamic 1 200 +step dynamic 1 2 set ini_step_size 2E-2 set fixed_step_size 1 set band_mat false @@ -68,6 +68,19 @@ converger RelIncreDisp 1 1E-11 10 1 analyze +# Node 2: +# Coordinate: +# 1.7580 2.2530 0 +# Displacement: +# 0.5282 -1.4996 0 +# Resistance: +# 0 0 0 +# Velocity: +# 0.2642 -0.7496 0 +# Acceleration: +# 0.0079 0.0086 0 +peek node 2 + # save recorder 1 exit diff --git a/Example/Other/Embed2D.supan b/Example/Other/Embed2D.supan index 83ca77b28..70524716c 100644 --- a/Example/Other/Embed2D.supan +++ b/Example/Other/Embed2D.supan @@ -31,11 +31,11 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 7: -# 0.5000 0.5000 +# 0.5000 0.5000 # Displacement: -# -1.4786e-05 1.9836e-04 +# -1.4786e-05 1.9836e-04 # Resistance: -# -0.0666 0.0195 +# -0.0666 0.0195 peek node 7 reset diff --git a/Example/Other/FixedLength.supan b/Example/Other/FixedLength.supan index 3f2ad960d..10d9a82dd 100644 --- a/Example/Other/FixedLength.supan +++ b/Example/Other/FixedLength.supan @@ -26,18 +26,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0 0.1667 0.2500 +# 0 0.1667 0.2500 # Resistance: -# 0 5.0000e+00 4.4409e-16 +# 0 5.0000e+00 4.4409e-16 # # Node 3: -# 1.0000 -1.0000 +# 1.0000 -1.0000 # Displacement: -# 0 0.1667 -0.2500 +# 0 0.1667 -0.2500 # Resistance: -# 0 5.0000e+00 -4.4409e-16 +# 0 5.0000e+00 -4.4409e-16 peek node 2 3 peek element 1 diff --git a/Example/Other/Group.supan b/Example/Other/Group.supan index 5964fd3a9..22037b7d7 100644 --- a/Example/Other/Group.supan +++ b/Example/Other/Group.supan @@ -174,11 +174,12 @@ step static 1 analyze # Node 117: -# 20.0000 5.0000 +# Coordinate: +# 20.0000 5.0000 # Displacement: -# -2.7281e-16 6.7310e+00 +# -9.6083e-15 6.7310e+00 # Resistance: -# 5.3291e-15 2.0000e+00 +# 6.8945e-14 2.0000e+00 peek node 117 peek element 1 diff --git a/Example/Other/GroupBodyForce.supan b/Example/Other/GroupBodyForce.supan index 79a2df3ff..e7aa00f11 100644 --- a/Example/Other/GroupBodyForce.supan +++ b/Example/Other/GroupBodyForce.supan @@ -25,11 +25,12 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# Coordinate: +# 1.0000 0 # Displacement: -# -4.0000e-05 0 +# -4.0000e-05 0 # Resistance: -# 3.5293e-18 -5.0000e-01 +# 1.8361e-17 -5.0000e-01 peek node 2 peek element 1 diff --git a/Example/Other/GroupDisplacement.supan b/Example/Other/GroupDisplacement.supan index 5bbba742d..020e912b2 100644 --- a/Example/Other/GroupDisplacement.supan +++ b/Example/Other/GroupDisplacement.supan @@ -169,11 +169,12 @@ step static 1 analyze # Node 117: -# 20.0000 5.0000 +# Coordinate: +# 20.0000 5.0000 # Displacement: -# -9.0824e-18 2.0000e+00 +# -2.1376e-15 2.0000e+00 # Resistance: -# -1.7375e-14 6.9464e-01 +# -7.8271e-15 6.9464e-01 peek node 117 peek element 1 diff --git a/Example/Other/GroupLoad.supan b/Example/Other/GroupLoad.supan index 983c7bfcb..50eca68a2 100644 --- a/Example/Other/GroupLoad.supan +++ b/Example/Other/GroupLoad.supan @@ -169,11 +169,12 @@ step static 1 analyze # Node 117: -# 20.0000 5.0000 +# Coordinate: +# 20.0000 5.0000 # Displacement: -# 4.7784e-16 6.7310e+00 +# -7.1150e-15 6.7310e+00 # Resistance: -# -3.0420e-14 2.0000e+00 +# 9.5257e-14 2.0000e+00 peek node 117 peek element 1 diff --git a/Example/Other/LJP2D.supan b/Example/Other/LJP2D.supan index 3fa4b8773..cd42695b3 100644 --- a/Example/Other/LJP2D.supan +++ b/Example/Other/LJP2D.supan @@ -58,7 +58,7 @@ rigidwall 3 0 5 0 -1 1E0 rigidwall 4 -5 0 1 0 1E0 rigidwall 5 0 -5 0 1 1E0 -step dynamic 1 200 +step dynamic 1 2 set ini_step_size 1E-2 set fixed_step_size 1 set band_mat false @@ -68,6 +68,19 @@ converger RelIncreDisp 1 1E-11 10 1 analyze +# Node 3: +# Coordinate: +# -2.4000 -0.8920 +# Displacement: +# 1.4826 1.3684 +# Resistance: +# 0 0 +# Velocity: +# 1.6300 1.7174 +# Acceleration: +# -0.8358 -1.9236 +peek node 3 + # save recorder 1 exit diff --git a/Example/Other/LinearParticle.supan b/Example/Other/LinearParticle.supan index 459cc34fc..fecd16f54 100644 --- a/Example/Other/LinearParticle.supan +++ b/Example/Other/LinearParticle.supan @@ -11,7 +11,7 @@ constraint linearspring2d 1 3. 10. modifier LinearViscosity 1 1E1 -step dynamic 1 200 +step dynamic 1 2 set ini_step_size 1E-2 set fixed_step_size 1 set band_mat false @@ -21,7 +21,20 @@ converger AbsIncreDisp 1 1E-11 10 1 analyze -save recorder 1 +# Node 2: +# Coordinate: +# 1.0000 0 +# Displacement: +# 0.1745 0 +# Resistance: +# 0 0 +# Velocity: +# 0.1578 0 +# Acceleration: +# 0.0493 0 +peek node 2 + +# save recorder 1 exit diff --git a/Example/Other/MPC.supan b/Example/Other/MPC.supan index baeb18954..6ed31961a 100644 --- a/Example/Other/MPC.supan +++ b/Example/Other/MPC.supan @@ -26,18 +26,18 @@ converger RelIncreDisp 1 1E-10 3 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0 0.1167 0.1750 +# 0 0.1167 0.1750 # Resistance: -# 0 3.5000e+00 -2.6645e-15 +# 0 3.5000e+00 -2.6645e-15 # # Node 3: -# 2.0000 0 +# 2.0000 0 # Displacement: -# 0 0.2167 -0.3250 +# 0 0.2167 -0.3250 # Resistance: -# 0 6.5000e+00 2.6645e-15 +# 0 6.5000e+00 2.6645e-15 peek node 2 3 peek element 1 diff --git a/Example/Other/MaxGap2D.supan b/Example/Other/MaxGap2D.supan index 558096aef..d2ca12eb0 100644 --- a/Example/Other/MaxGap2D.supan +++ b/Example/Other/MaxGap2D.supan @@ -26,18 +26,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0 0.2667 0.4000 +# 0 0.2667 0.4000 # Resistance: -# 0 8.0000e+00 1.7764e-15 +# 0 8.0000e+00 1.7764e-15 # # Node 3: -# 1.0000 -1.0000 +# 1.0000 -1.0000 # Displacement: -# 0 0.0667 -0.1000 +# 0 0.0667 -0.1000 # Resistance: -# 0 2.0000e+00 4.4409e-16 +# 0 2.0000e+00 4.4409e-16 peek node 2 3 peek element 1 diff --git a/Example/Other/MinGap2D.supan b/Example/Other/MinGap2D.supan index e855a9a79..9be344c03 100644 --- a/Example/Other/MinGap2D.supan +++ b/Example/Other/MinGap2D.supan @@ -26,18 +26,18 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0 -0.2667 -0.4000 +# 0 -0.2667 -0.4000 # Resistance: -# 0 -8.0000 0 +# 0 -8.0000 0 # # Node 3: -# 1.0000 -1.0000 +# 1.0000 -1.0000 # Displacement: -# 0 -0.0667 0.1000 +# 0 -0.0667 0.1000 # Resistance: -# 0 -2.0000 0 +# 0 -2.0000 0 peek node 2 3 peek element 1 diff --git a/Example/Other/NodeFacet.supan b/Example/Other/NodeFacet.supan index ebace430f..f8f54ed35 100644 --- a/Example/Other/NodeFacet.supan +++ b/Example/Other/NodeFacet.supan @@ -58,6 +58,37 @@ cload 1 0 -5. 3 11 analyze +# Node 10: +# Coordinate: +# 8.5000 2.0000 0.1000 +# Displacement: +# 0.0013 -0.1543 -0.2447 0 -0.0196 0.0201 +# Resistance: +# 1.3107e-02 -3.0430e-03 5.1732e-01 0 -3.1086e-15 -5.2042e-18 +# +# Node 2: +# Coordinate: +# 5.2000 0.5000 0 +# Displacement: +# -1.4838e-05 -7.4515e-07 -6.8944e-02 1.6011e-03 2.2136e-02 -3.1679e-06 +# Resistance: +# -4.0924e-03 9.5013e-04 -1.6152e-01 -8.3267e-17 -1.1102e-16 -6.5486e-17 +# +# Node 3: +# Coordinate: +# 10.0000 0 0 +# Displacement: +# -3.0265e-05 -1.3435e-05 -1.9350e-01 5.7062e-03 2.6700e-02 -1.0275e-05 +# Resistance: +# -4.5853e-03 1.0646e-03 -1.8098e-01 4.2071e-16 2.2128e-16 3.7080e-17 +# +# Node 6: +# Coordinate: +# 10.0000 5.0000 0 +# Displacement: +# -1.0927e-05 -7.0631e-06 -1.6409e-01 6.3367e-03 2.4003e-02 -3.8249e-06 +# Resistance: +# -4.4292e-03 1.0283e-03 -1.7481e-01 1.0478e-15 -3.0531e-16 -6.2992e-17 peek node 10 2 3 6 exit diff --git a/Example/Other/NodeLine.supan b/Example/Other/NodeLine.supan index e0cb72d44..e6bd345dc 100644 --- a/Example/Other/NodeLine.supan +++ b/Example/Other/NodeLine.supan @@ -29,11 +29,11 @@ converger RelIncreDisp 2 1E-11 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# -0.0008 -0.2000 -0.3000 +# -0.0008 -0.2000 -0.3000 # Resistance: -# -9.5380e-02 -6.0000e+00 -4.4409e-16 +# -9.5380e-02 -6.0000e+00 -4.4409e-16 peek node 2 5 6 peek group 1 2 diff --git a/Example/Other/RestitutionWall1D.supan b/Example/Other/RestitutionWall1D.supan index 439e1ddb5..5dbb4a827 100644 --- a/Example/Other/RestitutionWall1D.supan +++ b/Example/Other/RestitutionWall1D.supan @@ -14,7 +14,7 @@ initial acceleration -2 1 1 restitutionwall 1 0 1 1. 1E4 -step dynamic 1 8 +step dynamic 1 1.5 set ini_step_size 1E-2 set fixed_step_size 1 set symm_mat 0 @@ -24,8 +24,19 @@ converger RelIncreDisp 1 1E-10 5 1 analyze -save recorder 1 2 3 - +# save recorder 1 2 3 + +# Node 1: +# Coordinate: +# 1.0000 +# Displacement: +# -0.2409 +# Resistance: +# 0 +# Velocity: +# 0.9784 +# Acceleration: +# -2.0000 peek node 1 exit \ No newline at end of file diff --git a/Example/Other/RestitutionWall2D.supan b/Example/Other/RestitutionWall2D.supan index 37a488134..cbc6be37b 100644 --- a/Example/Other/RestitutionWall2D.supan +++ b/Example/Other/RestitutionWall2D.supan @@ -14,7 +14,7 @@ initial acceleration -2 2 1 restitutionwall 1 0 0 0 1 1. 1E4 -step dynamic 1 8 +step dynamic 1 1.5 set ini_step_size 1E-2 set fixed_step_size 1 set symm_mat 0 @@ -24,8 +24,19 @@ converger RelIncreDisp 1 1E-10 5 1 analyze -save recorder 1 2 3 - +# save recorder 1 2 3 + +# Node 1: +# Coordinate: +# 0 1.0000 +# Displacement: +# 0 -0.2409 +# Resistance: +# 0 0 +# Velocity: +# 0 0.9784 +# Acceleration: +# 0 -2.0000 peek node 1 exit \ No newline at end of file diff --git a/Example/Other/RestitutionWall3D.supan b/Example/Other/RestitutionWall3D.supan index bc7e500c2..cb4caea6c 100644 --- a/Example/Other/RestitutionWall3D.supan +++ b/Example/Other/RestitutionWall3D.supan @@ -14,7 +14,7 @@ initial acceleration -2 2 1 restitutionwall 1 0 0 0 0 1 0 1. 1E4 -step dynamic 1 8 +step dynamic 1 1.5 set ini_step_size 1E-2 set fixed_step_size 1 set symm_mat 0 @@ -24,8 +24,19 @@ converger RelIncreDisp 1 1E-10 5 1 analyze -save recorder 1 2 3 - +# save recorder 1 2 3 + +# Node 1: +# Coordinate: +# 0 1.0000 0 +# Displacement: +# 0 -0.2409 0 +# Resistance: +# 0 0 0 +# Velocity: +# 0 0.9784 0 +# Acceleration: +# 0 -2.0000 0 peek node 1 exit \ No newline at end of file diff --git a/Example/Other/RigidWall1D.supan b/Example/Other/RigidWall1D.supan index aa2571c28..f0f801d72 100644 --- a/Example/Other/RigidWall1D.supan +++ b/Example/Other/RigidWall1D.supan @@ -14,7 +14,7 @@ initial acceleration -1 1 1 constraint rigidwall 1 0 1 .1 -step dynamic 1 8 +step dynamic 1 1.5 set ini_step_size 1E-3 set fixed_step_size 1 set symm_mat 0 @@ -27,15 +27,16 @@ analyze # save recorder 1 2 3 # Node 1: -# 0 1.0000 +# Coordinate: +# 1.0000 # Displacement: -# 0 -0.1357 +# -0.8956 # Resistance: -# 0 0 +# 0 # Velocity: -# 0 0.5259 +# 1.3396 # Acceleration: -# 0 -1.0000 +# -1.0000 peek node 1 exit \ No newline at end of file diff --git a/Example/Other/RigidWall2D.supan b/Example/Other/RigidWall2D.supan index 65d3da1e1..060a046c6 100644 --- a/Example/Other/RigidWall2D.supan +++ b/Example/Other/RigidWall2D.supan @@ -14,7 +14,7 @@ initial acceleration -1 2 1 constraint rigidwall 1 0 0 0 1 .1 -step dynamic 1 8 +step dynamic 1 1.5 set ini_step_size 1E-3 set fixed_step_size 1 set symm_mat 0 @@ -27,15 +27,16 @@ analyze # save recorder 1 2 3 # Node 1: -# 0 1.0000 +# Coordinate: +# 0 1.0000 # Displacement: -# 0 -0.1357 +# 0 -0.8956 # Resistance: -# 0 0 +# 0 0 # Velocity: -# 0 0.5259 +# 0 1.3396 # Acceleration: -# 0 -1.0000 +# 0 -1.0000 peek node 1 exit \ No newline at end of file diff --git a/Example/Other/RigidWall3D.supan b/Example/Other/RigidWall3D.supan index c117eaca4..c876d175a 100644 --- a/Example/Other/RigidWall3D.supan +++ b/Example/Other/RigidWall3D.supan @@ -14,7 +14,7 @@ initial acceleration -1 2 1 constraint rigidwall 1 0 0 0 0 1 0 .1 -step dynamic 1 8 +step dynamic 1 1.5 set ini_step_size 1E-3 set fixed_step_size 1 set symm_mat 0 @@ -27,15 +27,16 @@ analyze # save recorder 1 2 3 # Node 1: -# 0 1.0000 +# Coordinate: +# 0 1.0000 0 # Displacement: -# 0 -0.1357 +# 0 -0.8956 0 # Resistance: -# 0 0 +# 0 0 0 # Velocity: -# 0 0.5259 +# 0 1.3396 0 # Acceleration: -# 0 -1.0000 +# 0 -1.0000 0 peek node 1 exit \ No newline at end of file diff --git a/Example/Other/RigidWallMultiplier.supan b/Example/Other/RigidWallMultiplier.supan index db9bd9539..22d7b6bda 100644 --- a/Example/Other/RigidWallMultiplier.supan +++ b/Example/Other/RigidWallMultiplier.supan @@ -29,7 +29,7 @@ analyze # Displacement: # 0 -0.6000 -0.6857 # Resistance: -# 0 -3.9429e+01 -1.4211e-14 +# 0 -3.9429e+01 -3.5527e-15 # # Node 3: # Coordinate: @@ -37,7 +37,7 @@ analyze # Displacement: # 0 -1.0000 -0.2571 # Resistance: -# 0 8.5714e+00 -1.7764e-14 +# 0 8.5714e+00 -4.4409e-15 peek node 2 3 exit \ No newline at end of file diff --git a/Example/Section/Fibre1D.supan b/Example/Section/Fibre1D.supan index c47e66d2c..c934eeaf9 100644 --- a/Example/Section/Fibre1D.supan +++ b/Example/Section/Fibre1D.supan @@ -29,7 +29,7 @@ converger RelIncreDisp 1 1E-10 10 1 analyze -# 0.6388 -0.6049 +# 0.6388 -0.6049 peek node 2 peek element 2 diff --git a/Example/Section/Fibre2D.supan b/Example/Section/Fibre2D.supan index 7e832aa01..7e6721f8d 100644 --- a/Example/Section/Fibre2D.supan +++ b/Example/Section/Fibre2D.supan @@ -36,11 +36,11 @@ converger RelIncreDisp 1 1E-10 20 1 analyze # Node 1: -# 0 0 +# 0 0 # Displacement: -# 1.4815e-02 1.0000e-04 +# 1.4815e-02 1.0000e-04 # Resistance: -# 1.7817e-06 3.6439e+08 +# 1.7817e-06 3.6439e+08 peek node 1 peek element 1 diff --git a/Example/Section/HSection2D.supan b/Example/Section/HSection2D.supan index 29ec4e999..2c1b469d3 100644 --- a/Example/Section/HSection2D.supan +++ b/Example/Section/HSection2D.supan @@ -24,11 +24,11 @@ converger RelIncreDisp 1 1E-10 20 1 analyze # Node 1: -# 0 0 +# 0 0 # Displacement: -# 6.9176e-20 1.0000e-04 +# 6.9176e-20 1.0000e-04 # Resistance: -# -1.8190e-12 1.6055e+06 +# -1.8190e-12 1.6055e+06 peek node 1 peek element 1 diff --git a/Example/Section/ISection2D.supan b/Example/Section/ISection2D.supan index b62e8aa01..8e96dc185 100644 --- a/Example/Section/ISection2D.supan +++ b/Example/Section/ISection2D.supan @@ -24,11 +24,11 @@ converger RelIncreDisp 1 1E-10 20 1 analyze # Node 1: -# 0 0 +# 0 0 # Displacement: -# -0.0140 0.0003 +# -0.0140 0.0003 # Resistance: -# -1.0914e-11 7.6590e+06 +# -1.0914e-11 7.6590e+06 peek node 1 peek element 1 diff --git a/Example/Section/ISection3D.supan b/Example/Section/ISection3D.supan index 4b4afa53a..ef07c6252 100644 --- a/Example/Section/ISection3D.supan +++ b/Example/Section/ISection3D.supan @@ -30,11 +30,11 @@ converger RelIncreDisp 1 1E-10 20 1 analyze # Node 1: -# 0 0 0 +# 0 0 0 # Displacement: -# -3.2546e-03 3.0000e-04 -2.8666e-20 +# -3.2546e-03 3.0000e-04 -2.8666e-20 # Resistance: -# -7.1054e-14 8.2438e+06 -2.9363e-12 +# -7.1054e-14 8.2438e+06 -2.9363e-12 peek node 1 peek element 1 2 3 4 diff --git a/Example/Section/TSection3D.supan b/Example/Section/TSection3D.supan index a61ca0f9e..6fe490147 100644 --- a/Example/Section/TSection3D.supan +++ b/Example/Section/TSection3D.supan @@ -24,11 +24,11 @@ converger RelIncreDisp 1 1E-10 20 1 analyze # Node 1: -# 0 0 0 +# 0 0 0 # Displacement: -# -1.6307e-02 3.0000e-04 1.4665e-21 +# -1.6307e-02 3.0000e-04 1.4665e-21 # Resistance: -# 6.8212e-12 1.5923e+06 5.8208e-11 +# 6.8212e-12 1.5923e+06 5.8208e-11 peek node 1 peek element 1 diff --git a/Example/Solver/Amplitude.supan b/Example/Solver/Amplitude.supan index 63cdbd6c0..486aeb2ab 100644 --- a/Example/Solver/Amplitude.supan +++ b/Example/Solver/Amplitude.supan @@ -13,9 +13,9 @@ material BilinearJ2 1 2E5 .2 100 .1 element C3D8 1 1 2 3 4 5 6 7 8 1 -fix 1 1 1 2 5 6 -fix 2 2 1 4 5 8 -fix 3 3 1 2 3 4 +fix2 1 1 1 2 5 6 +fix2 2 2 1 4 5 8 +fix2 3 3 1 2 3 4 amplitude Linear 1 10 amplitude Cosine 2 1 0 1 @@ -29,7 +29,7 @@ hdf5recorder 1 Node RF 7 hdf5recorder 2 Node U3 7 hdf5recorder 3 Amplitude 3 -step static 1 4 +step static 1 1 set fixed_step_size 1 set ini_step_size 1E-2 set symm_mat 0 @@ -41,11 +41,12 @@ analyze # peek element 1 # Node 5: -# 5.0000 -5.0000 5.0000 +# Coordinate: +# 5.0000 -5.0000 5.0000 # Displacement: -# 0 0 -2.5037e-33 +# 0 0 4.3368e-19 # Resistance: -# -4.1180e-03 4.1180e-03 -2.9649e+05 +# -6.1965e-12 5.7078e-12 -8.4303e+04 peek node 5 # save recorder 1 2 diff --git a/Example/Solver/BFGS.supan b/Example/Solver/BFGS.supan index 4a920f2f7..b63b3acf2 100644 --- a/Example/Solver/BFGS.supan +++ b/Example/Solver/BFGS.supan @@ -10,7 +10,7 @@ material MPF 2 100 5 .1 20. 18.5 .15 .01 7. true element T2D2 1 1 2 1 10 element T2D2 2 3 2 2 10 -fix 1 P 1 3 +fix2 1 P 1 3 step static 1 solver BFGS 1 @@ -24,11 +24,12 @@ converger RelIncreDisp 1 1E-8 50 1 analyze # Node 2: -# 4.0000 0 +# Coordinate: +# 4.0000 0 # Displacement: -# -3.5333 14.8500 +# -3.5333 14.8500 # Resistance: -# 1.3333e-06 1.0000e+02 +# -5.6843e-14 1.0000e+02 peek node 2 peek solver 1 diff --git a/Example/Solver/BICGSTAB.supan b/Example/Solver/BICGSTAB.supan index 9923da623..22e93ba04 100644 --- a/Example/Solver/BICGSTAB.supan +++ b/Example/Solver/BICGSTAB.supan @@ -22,18 +22,20 @@ converger RelIncreDisp 1 1E-8 20 1 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# -4.4409e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# 0 1.0000 peek node 2 3 reset diff --git a/Example/Solver/Band.supan b/Example/Solver/Band.supan index 0f2c6909c..a88946b80 100644 --- a/Example/Solver/Band.supan +++ b/Example/Solver/Band.supan @@ -21,18 +21,20 @@ set sparse_mat 0 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# -2.2204e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# 3.3307e-16 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/BandSymm.supan b/Example/Solver/BandSymm.supan index 13bbabb96..ed9621aef 100644 --- a/Example/Solver/BandSymm.supan +++ b/Example/Solver/BandSymm.supan @@ -21,18 +21,20 @@ set sparse_mat 0 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# 2.2204e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# -1.1102e-16 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/BatheExplicit.supan b/Example/Solver/BatheExplicit.supan new file mode 100644 index 000000000..b9826e76a --- /dev/null +++ b/Example/Solver/BatheExplicit.supan @@ -0,0 +1,59 @@ +node 1 0 0 +node 2 0 -2 +node 3 0 -3 +node 4 0 -5 + +material Elastic1D 1 1E7 + +element T2D2 1 1 2 1 1 true +element T2D2 2 2 3 1 1 true +element T2D2 3 3 4 1 1 true + +element Mass 4 1 20 1 2 +element Mass 5 2 20 1 2 +element Mass 6 3 10 1 2 +element Mass 7 4 20 1 2 + +fix2 1 P 1 + +initial velocity 25 1 3 + +amplitude Constant 1 +cload 1 1 -200 2 2 +cload 2 1 -100 2 3 +cload 3 1 -200 2 4 + +hdf5recorder 1 Node U 2 3 4 + +step explicitdynamic 1 1 +set ini_step_size 1E-3 +set fixed_step_size 1 +set symm_mat 0 +set linear_system + +integrator BatheExplicit 1 .9 + +converger RelIncreAcc 1 1E-10 10 0 + +analyze + +# Node 4: +# Coordinate: +# 0 -5.0000 +# Displacement: +# 2.1257 9.3850 +# Resistance: +# 2.7423e+03 3.8522e+03 +# Velocity: +# 0.1839 0.6416 +# Acceleration: +# -1.3711e+02 -2.0261e+02 +peek node 2 3 4 + +peek integrator 1 + +# save recorder 1 + +reset +clear +exit \ No newline at end of file diff --git a/Example/Solver/BatheTwoStep.supan b/Example/Solver/BatheTwoStep.supan index c539e1548..d906c6246 100644 --- a/Example/Solver/BatheTwoStep.supan +++ b/Example/Solver/BatheTwoStep.supan @@ -24,7 +24,7 @@ cload 3 1 -200 2 4 hdf5recorder 1 Node U 2 3 4 -step dynamic 1 10 +step dynamic 1 1 set ini_step_size 1E-2 set fixed_step_size 1 set symm_mat 0 @@ -35,16 +35,17 @@ converger RelIncreDisp 1 1E-10 10 1 analyze -# Node 2: -# 0 -2.0000 +# Node 4: +# Coordinate: +# 0 -5.0000 # Displacement: -# -1.8160 2.8378 +# 1.3869 7.4423 # Resistance: -# 2.1074e+02 3.3268e+02 +# -13.9469 87.9345 # Velocity: -# -0.8459 -1.8286 +# 1.4165 1.3658 # Acceleration: -# -10.5371 -26.6342 +# 0.6973 -14.3967 peek node 2 3 4 peek integrator 1 diff --git a/Example/Solver/ElementalModal.supan b/Example/Solver/ElementalModal.supan index 8c7d3ea0c..ac98c30ad 100644 --- a/Example/Solver/ElementalModal.supan +++ b/Example/Solver/ElementalModal.supan @@ -7,7 +7,7 @@ material Elastic1D 1 10 .2 element EB21 1 1 2 12 1 1 -fix 1 P 1 +fix2 1 P 1 hdf5recorder 1 Node U2 2 @@ -20,7 +20,7 @@ displacement 1 0 1 2 2 converger AbsIncreDisp 1 1E-8 3 1 -step dynamic 2 4 +step dynamic 2 1 set ini_step_size .01 set fixed_step_size 1 @@ -31,15 +31,16 @@ converger RelIncreDisp 2 1E-10 3 1 analyze # Node 2: -# 1.0000 0 +# Coordinate: +# 1.0000 0 # Displacement: -# 0 -0.5387 -0.7480 +# 0 0.5646 0.7911 # Resistance: -# 0 -19.7634 2.4017 +# 0 20.2813 -2.2296 # Velocity: -# 0 2.1988 2.9521 +# 0 -4.8550 -6.5563 # Acceleration: -# 0 27.5128 38.2263 +# 0 -28.4218 -40.9698 peek node 2 exit \ No newline at end of file diff --git a/Example/Solver/FEAST.BAND.supan b/Example/Solver/FEAST.BAND.supan index 721e9a12c..e9f0290dc 100644 --- a/Example/Solver/FEAST.BAND.supan +++ b/Example/Solver/FEAST.BAND.supan @@ -20,7 +20,7 @@ set system_solver SPIKE analyze # Eigenvalues: -# 24.2601 +# 24.2601 peek eigenvalue exit \ No newline at end of file diff --git a/Example/Solver/FEAST.FULL.supan b/Example/Solver/FEAST.FULL.supan index 1c25072ab..7082372ea 100644 --- a/Example/Solver/FEAST.FULL.supan +++ b/Example/Solver/FEAST.FULL.supan @@ -21,7 +21,7 @@ set band_mat false analyze # Eigenvalues: -# 24.2601 +# 24.2601 peek eigenvalue exit \ No newline at end of file diff --git a/Example/Solver/FEAST.SPARSE.supan b/Example/Solver/FEAST.SPARSE.supan index 896fbb0a5..8dde5ca34 100644 --- a/Example/Solver/FEAST.SPARSE.supan +++ b/Example/Solver/FEAST.SPARSE.supan @@ -20,7 +20,7 @@ set sparse_mat 1 analyze # Eigenvalues: -# 24.2601 +# 24.2601 peek eigenvalue exit \ No newline at end of file diff --git a/Example/Solver/FGMRES.supan b/Example/Solver/FGMRES.supan index b6615c7d2..8be01f1e0 100644 --- a/Example/Solver/FGMRES.supan +++ b/Example/Solver/FGMRES.supan @@ -22,18 +22,20 @@ converger RelIncreDisp 1 1E-8 20 1 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# 1.8874e-15 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# -1.5543e-15 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/Frequency.supan b/Example/Solver/Frequency.supan index 0fbf9f75d..390c99756 100644 --- a/Example/Solver/Frequency.supan +++ b/Example/Solver/Frequency.supan @@ -19,7 +19,7 @@ set band_mat 0 analyze # Eigenvalues: -# 30.0000 +# 30.0000 peek eigenvalue exit \ No newline at end of file diff --git a/Example/Solver/FullMat.supan b/Example/Solver/FullMat.supan index 5e02b3f4a..7fbde479a 100644 --- a/Example/Solver/FullMat.supan +++ b/Example/Solver/FullMat.supan @@ -21,18 +21,20 @@ set sparse_mat 0 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# -2.2204e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# 3.3307e-16 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/GMRES.supan b/Example/Solver/GMRES.supan index b9fa60173..8daacd904 100644 --- a/Example/Solver/GMRES.supan +++ b/Example/Solver/GMRES.supan @@ -22,18 +22,20 @@ converger RelIncreDisp 1 1E-8 20 1 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# -1.1102e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# 4.4409e-16 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/GSSSSOptimal.supan b/Example/Solver/GSSSSOptimal.supan index cf9495307..575cc51c6 100644 --- a/Example/Solver/GSSSSOptimal.supan +++ b/Example/Solver/GSSSSOptimal.supan @@ -9,7 +9,7 @@ element EB21 1 1 2 10 1 1 0 mass 2 2 10 1 -fix 1 P 1 +fix2 1 P 1 modifier Rayleigh 3 .2 .002 .0 .0 @@ -17,27 +17,28 @@ amplitude Tabular 1 EZ acceleration 2 1 .2 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size .05 set fixed_step_size 1 +set linear_system integrator GSSSSOptimal 1 .6 -converger RelIncreDisp 1 1E-4 4 1 +converger RelIncreDisp 1 1E-8 4 1 analyze # Node 2: # Coordinate: -# 0 1.0000 +# 0 1.0000 # Displacement: -# 0.0137 0 -0.0205 +# 0.0322 0 -0.0482 # Resistance: -# 4.5503 0 0.0169 +# 9.7595 0 0.0646 # Velocity: -# -0.0867 0 0.1299 +# 0.3811 0 -0.5858 # Acceleration: -# -0.3936 0 0.5893 +# 0.1950 0 -1.6547 peek node 2 peek integrator 1 diff --git a/Example/Solver/GSSSSU0.supan b/Example/Solver/GSSSSU0.supan index 0dc61d900..2d8022adc 100644 --- a/Example/Solver/GSSSSU0.supan +++ b/Example/Solver/GSSSSU0.supan @@ -9,7 +9,7 @@ element EB21 1 1 2 10 1 1 0 mass 2 2 10 1 -fix 1 P 1 +fix2 1 P 1 modifier Rayleigh 3 .2 .002 .0 .0 @@ -17,7 +17,7 @@ amplitude Tabular 1 EZ acceleration 2 1 .2 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size .05 set fixed_step_size 1 @@ -28,15 +28,16 @@ converger RelIncreDisp 1 1E-4 4 1 analyze # Node 2: -# 0 1.0000 +# Coordinate: +# 0 1.0000 # Displacement: -# 0.0130 0 -0.0195 +# 0.0333 0 -0.0498 # Resistance: -# 4.3305 0 0.0160 +# 10.0999 0 0.0672 # Velocity: -# -0.0917 0 0.1373 +# 0.3746 0 -0.5726 # Acceleration: -# -0.3770 0 0.5645 +# 0.3696 0 -1.6194 peek node 2 peek integrator 1 diff --git a/Example/Solver/GSSSSV0.supan b/Example/Solver/GSSSSV0.supan index 0a4187a0c..f380b902b 100644 --- a/Example/Solver/GSSSSV0.supan +++ b/Example/Solver/GSSSSV0.supan @@ -9,7 +9,7 @@ element EB21 1 1 2 10 1 1 0 mass 2 2 10 1 -fix 1 P 1 +fix2 1 P 1 modifier Rayleigh 3 .2 .002 .0 .0 @@ -17,7 +17,7 @@ amplitude Tabular 1 EZ acceleration 2 1 .2 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size .05 set fixed_step_size 1 @@ -28,15 +28,16 @@ converger RelIncreDisp 1 1E-4 4 1 analyze # Node 2: -# 0 1.0000 +# Coordinate: +# 0 1.0000 # Displacement: -# 0.0139 0 -0.0208 +# 0.0329 0 -0.0491 # Resistance: -# 4.5853 0 0.0170 +# 9.9584 0 0.0672 # Velocity: -# -0.0866 0 0.1297 +# 0.3755 0 -0.5750 # Acceleration: -# -0.4049 0 0.6062 +# 0.4176 0 -1.7028 peek node 2 peek integrator 1 diff --git a/Example/Solver/GeneralizedAlpha.supan b/Example/Solver/GeneralizedAlpha.supan index 892b95092..9a144ac16 100644 --- a/Example/Solver/GeneralizedAlpha.supan +++ b/Example/Solver/GeneralizedAlpha.supan @@ -9,7 +9,7 @@ element EB21 1 1 2 10 1 1 0 mass 2 2 10 1 -fix 1 P 1 +fix2 1 P 1 modifier Rayleigh 3 .2 .002 .0 .0 @@ -17,9 +17,10 @@ amplitude Tabular 1 EZ acceleration 2 1 .2 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size .05 set fixed_step_size 1 +set linear_system integrator GeneralizedAlpha 1 .6 @@ -28,15 +29,16 @@ converger RelIncreDisp 1 1E-4 4 1 analyze # Node 2: -# 0 1.0000 +# Coordinate: +# 0 1.0000 # Displacement: -# 0.0145 0 -0.0217 +# 0.0321 0 -0.0481 # Resistance: -# 4.3722 0 0.0162 +# 9.7372 0 0.0619 # Velocity: -# -0.0912 0 0.1365 +# 0.3776 0 -0.5775 # Acceleration: -# -0.4329 0 0.6482 +# 0.6774 0 -1.9255 peek node 2 peek integrator 1 diff --git a/Example/Solver/Lee.supan b/Example/Solver/Lee.supan index c479ad484..480d9c903 100644 --- a/Example/Solver/Lee.supan +++ b/Example/Solver/Lee.supan @@ -8,8 +8,8 @@ material Bilinear1D 1 100 1 .05 1. 1E-2 element T2D2 1 1 2 1 1 element Mass 2 2 100 1 -fix 1 1 1 -fix 2 2 1 2 +fix2 1 1 1 +fix2 2 2 1 2 hdf5recorder 1 Node U1 2 hdf5recorder 2 Node RF1 2 @@ -34,15 +34,15 @@ converger RelIncreDisp 2 1E-10 3 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0.0102 0 +# 0.0102 0 # Resistance: -# 0.8333 0 +# 0.8333 0 # Velocity: -# 0.0026 0 +# 0.0026 0 # Acceleration: -# -0.0085 0 +# -0.0085 0 peek node 2 # save recorder 1 2 diff --git a/Example/Solver/LeeElemental.supan b/Example/Solver/LeeElemental.supan index 65cec86f7..eafc16eb7 100644 --- a/Example/Solver/LeeElemental.supan +++ b/Example/Solver/LeeElemental.supan @@ -36,15 +36,15 @@ converger RelIncreDisp 2 1E-10 3 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0.0102 0 +# 0.0102 0 # Resistance: -# 0.8333 0 +# 0.8333 0 # Velocity: -# 0.0026 0 +# 0.0026 0 # Acceleration: -# -0.0085 0 +# -0.0085 0 peek node 2 # save recorder 1 2 diff --git a/Example/Solver/LeeFull.supan b/Example/Solver/LeeFull.supan index 3fad20c23..1eced6b98 100644 --- a/Example/Solver/LeeFull.supan +++ b/Example/Solver/LeeFull.supan @@ -5,7 +5,7 @@ node 2 1 0 material Elastic1D 1 100 -element T2D2 1 1 2 1 1 +element T2D2 1 1 2 1 1 1 1 1 element Mass 2 1 100 1 2 element Mass 3 2 100 1 2 @@ -51,15 +51,16 @@ converger RelIncreDisp 2 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# Coordinate: +# 1.0000 0 # Displacement: -# 0.1007 0 +# 0.0978 0 # Resistance: -# 10.0668 0 +# 8.5029 0 # Velocity: -# 0.0351 0 +# 0.0327 0 # Acceleration: -# -0.1077 0 +# -0.0905 0 peek node 2 # save recorder 1 2 3 diff --git a/Example/Solver/LeeFullElastic.supan b/Example/Solver/LeeFullElastic.supan new file mode 100644 index 000000000..adfeb065c --- /dev/null +++ b/Example/Solver/LeeFullElastic.supan @@ -0,0 +1,72 @@ +# A TEST MODEL FOR LEE DAMPING (FULL MODES) MODEL + +node 1 0 0 +node 2 1 0 + +material Elastic1D 1 100 + +element T2D2 1 1 2 1 1 +element Mass 2 1 100 1 2 +element Mass 3 2 100 1 2 + +fix 1 1 1 +fix 2 2 1 2 + +plainrecorder 1 Node U1 2 +plainrecorder 2 Node RF1 2 +plainrecorder 3 Node DF1 2 +plainrecorder 4 Node IF1 2 +plainrecorder 5 Node GDF1 2 +plainrecorder 6 Node GIF1 2 + +step static 1 1 +set ini_step_size .1 +set fixed_step_size 1 + +displacement 1 0 .2 1 2 + +converger RelIncreDisp 1 1E-10 3 1 + +step dynamic 2 6 +set ini_step_size .02 +set fixed_step_size 1 +set sparse_mat 1 +set system_solver SuperLU +set linear_system + +integrator LeeNewmarkFull 1 .25 .5 \ +-type0 .005 1 \ +-type1 .005 1 2 \ +-type1 .01 1 3 \ +-type2 .01 1 2 1 \ +-type2 .01 1 0 1 \ +-type2 .01 1 3 2 \ +-type3 .01 1 .5 \ +-type4 .01 1 0 1 1 0 2. \ +-type4 .01 1 1 1 1 0 2. \ +-type4 .01 1 0 1 1 1 2. \ +-type4 .01 1 1 1 1 1 2. + +converger RelIncreDisp 2 1E-10 10 1 + +analyze + +# Node 2: +# 1.0000 0 +# Displacement: +# 0.1007 0 +# Resistance: +# 10.0668 0 +# Velocity: +# 0.0351 0 +# Acceleration: +# -0.1077 0 +peek node 2 + +# save recorder 1 2 3 + +peek integrator 1 + +reset +clear +exit \ No newline at end of file diff --git a/Example/Solver/LeeFullIterative.supan b/Example/Solver/LeeFullIterative.supan index 8c0b61248..ee88705c1 100644 --- a/Example/Solver/LeeFullIterative.supan +++ b/Example/Solver/LeeFullIterative.supan @@ -53,15 +53,15 @@ converger RelIncreDisp 2 1E-10 10 1 analyze # Node 2: -# 1.0000 0 +# 1.0000 0 # Displacement: -# 0.1007 0 +# 0.1007 0 # Resistance: -# 10.0668 0 +# 10.0668 0 # Velocity: -# 0.0351 0 +# 0.0351 0 # Acceleration: -# -0.1077 0 +# -0.1077 0 peek node 2 # save recorder 1 2 3 diff --git a/Example/Solver/Momentum.supan b/Example/Solver/Momentum.supan index 975de1176..35cd46ac2 100644 --- a/Example/Solver/Momentum.supan +++ b/Example/Solver/Momentum.supan @@ -29,7 +29,7 @@ hdf5recorder 3 Global MOMENTUMY hdf5recorder 4 Node MOMENTUMX 2 3 4 hdf5recorder 5 Node MOMENTUMY 2 3 4 -step dynamic 1 10 +step dynamic 1 1 set ini_step_size 1E-3 set fixed_step_size 1 set symm_mat 0 @@ -40,6 +40,19 @@ converger RelIncreDisp 1 1E-10 10 1 analyze +# Node 4: +# Coordinate: +# 0 -5.0000 +# Displacement: +# 2.5699 8.2627 +# Resistance: +# 0 0 +# Velocity: +# 3.5857 1.0751 +# Acceleration: +# -1.7425 -15.4797 +peek node 4 + # save recorder 1 2 3 4 5 exit \ No newline at end of file diff --git a/Example/Solver/Newmark.supan b/Example/Solver/Newmark.supan index 3f978fb3c..28aed6237 100644 --- a/Example/Solver/Newmark.supan +++ b/Example/Solver/Newmark.supan @@ -27,7 +27,7 @@ modifier LumpedScale 1 1 modifier LumpedSimple 2 2 modifier Rayleigh 3 .2 .002 .0 .0 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 5E-2 set fixed_step_size true @@ -38,15 +38,16 @@ converger RelIncreDisp 1 1E-10 4 1 analyze # Node 2: -# 0 1.0000 +# Coordinate: +# 0 1.0000 # Displacement: -# -0.0325 0.0372 0.0258 +# 0.0333 -0.0002 -0.0497 # Resistance: -# -2.1849 -0.0045 0.0009 +# 9.5880 0.1065 -0.0060 # Velocity: -# -0.0444 -0.0010 0.0642 +# 0.3181 -0.0050 -0.4666 # Acceleration: -# 0.2173 0.0041 -0.3138 +# -0.0593 -0.0836 -0.1654 peek node 2 peek integrator 1 diff --git a/Example/Solver/Newton.supan b/Example/Solver/Newton.supan index fe85f7814..af62d5504 100644 --- a/Example/Solver/Newton.supan +++ b/Example/Solver/Newton.supan @@ -23,11 +23,12 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 4.0000 0 +# Coordinate: +# 4.0000 0 # Displacement: -# -3.5333 14.8500 +# -3.5333 14.8500 # Resistance: -# 1.3333e-06 1.0000e+02 +# 2.8422e-14 1.0000e+02 peek node 2 peek solver 1 diff --git a/Example/Solver/PARDISO.supan b/Example/Solver/PARDISO.supan index 004a8937f..62183b2df 100644 --- a/Example/Solver/PARDISO.supan +++ b/Example/Solver/PARDISO.supan @@ -24,18 +24,20 @@ converger RelIncreDisp 1 1E-8 20 1 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# -1.1102e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# 0 1.0000 peek node 2 3 reset diff --git a/Example/Solver/Ramm.supan b/Example/Solver/Ramm.supan index e83ea2821..b380c09d2 100644 --- a/Example/Solver/Ramm.supan +++ b/Example/Solver/Ramm.supan @@ -46,16 +46,17 @@ step arclength 1 11 2 -20 criterion MinDisplacement 1 11 2 -5.5 -converger RelIncreDisp 1 1E-8 5 1 +converger RelIncreDisp 1 1E-10 4 1 analyze # Node 11: -# 2.0000 8.0000 +# Coordinate: +# 2.0000 8.0000 # Displacement: -# 5.9350 -5.5107 0.6801 +# 5.9364 -5.5092 0.6794 # Resistance: -# 1.4495e-10 -1.0647e+01 3.9790e-13 +# -1.2223e-09 -1.0142e+01 1.8190e-12 peek node 11 # save recorder 1 diff --git a/Example/Solver/RayleighNewmark.supan b/Example/Solver/RayleighNewmark.supan index 4edff4885..c36411b92 100644 --- a/Example/Solver/RayleighNewmark.supan +++ b/Example/Solver/RayleighNewmark.supan @@ -9,7 +9,7 @@ element EB21 1 1 2 10 1 1 false mass 2 2 10 1 -fix 1 P 1 +fix2 1 P 1 amplitude Tabular 1 EZ @@ -17,7 +17,7 @@ acceleration 2 1 .2 1 2 hdf5recorder 1 Node U 2 -step dynamic 1 30 +step dynamic 1 1 set ini_step_size .02 set fixed_step_size 1 set sparse_mat 1 @@ -29,15 +29,16 @@ converger RelIncreDisp 1 1E-10 10 1 analyze # Node 2: -# 0 1.0000 +# Coordinate: +# 0 1.0000 # Displacement: -# -4.9125e-07 0 7.3550e-07 +# 0.0277 0 -0.0414 # Resistance: -# -1.4820e-04 0 -5.4887e-07 +# 8.3919 0 0.0551 # Velocity: -# -6.1240e-06 0 9.1689e-06 +# 0.2979 0 -0.4467 # Acceleration: -# 2.3678e-05 0 -3.5452e-05 +# -0.3311 0 0.5499 peek node 2 peek integrator 1 diff --git a/Example/Solver/SHALLOW.ARC.supan b/Example/Solver/SHALLOW.ARC.supan index 6de238b23..3c736ad55 100644 --- a/Example/Solver/SHALLOW.ARC.supan +++ b/Example/Solver/SHALLOW.ARC.supan @@ -61,11 +61,11 @@ converger RelIncreDisp 1 1E-4 4 1 analyze # Node 7: -# 4.0223 0.8396 +# 4.0223 0.8396 # Displacement: -# 0.0168 -1.8050 0.1679 +# 0.0168 -1.8050 0.1679 # Resistance: -# -1.4197e-10 -1.8196e+02 -2.2510e-11 +# -1.4197e-10 -1.8196e+02 -2.2510e-11 peek node 7 # save recorder 1 diff --git a/Example/Solver/SuperLU.supan b/Example/Solver/SuperLU.supan index 0c81047f6..4beed7857 100644 --- a/Example/Solver/SuperLU.supan +++ b/Example/Solver/SuperLU.supan @@ -22,18 +22,20 @@ converger RelIncreDisp 1 1E-8 20 1 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# 5.5511e-16 1.0000e+00 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# -6.6613e-16 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/SupportMotion.supan b/Example/Solver/SupportMotion.supan index a6eeb364b..ed07cc8f2 100644 --- a/Example/Solver/SupportMotion.supan +++ b/Example/Solver/SupportMotion.supan @@ -23,7 +23,7 @@ disable load 3 hdf5recorder 1 Node U1 1 2 -step dynamic 1 1E-1 +step dynamic 1 1 set ini_step_size 5E-2 set fixed_step_size true @@ -37,6 +37,17 @@ precheck analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.3590e-01 8.7663e-19 4.3882e-02 +# Resistance: +# -9.0734e+00 8.4743e-16 -1.4851e-01 +# Velocity: +# -3.2678e-01 6.4794e-18 6.3610e-01 +# Acceleration: +# 7.0548e-01 -8.4457e-16 5.5854e+00 peek node 1 2 clear diff --git a/Example/Solver/SupportMotionA.supan b/Example/Solver/SupportMotionA.supan index 61d47830e..527524ac2 100644 --- a/Example/Solver/SupportMotionA.supan +++ b/Example/Solver/SupportMotionA.supan @@ -20,7 +20,7 @@ supportacceleration 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 5E-2 set fixed_step_size true @@ -30,6 +30,17 @@ converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.2663e-01 -3.3999e-20 5.4934e-02 +# Resistance: +# -1.1005e+01 -3.0365e-16 -9.0983e-03 +# Velocity: +# -2.4057e-01 -3.0945e-18 5.0382e-01 +# Acceleration: +# 1.0892e+00 4.6432e-17 -8.2083e+00 peek node 1 2 exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionBatheA.supan b/Example/Solver/SupportMotionBatheA.supan index 002f90930..c39950881 100644 --- a/Example/Solver/SupportMotionBatheA.supan +++ b/Example/Solver/SupportMotionBatheA.supan @@ -20,16 +20,27 @@ supportacceleration 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 1E-2 set fixed_step_size true -integrator BatheTwoStep 1 +integrator BatheTwoStep 1 .2 .1 converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.4767e-01 9.6137e-20 5.6186e-02 +# Resistance: +# -1.1508e+01 -2.6611e-16 -1.3546e-01 +# Velocity: +# -3.2038e-01 8.3929e-19 6.0273e-01 +# Acceleration: +# 1.0702e+00 3.5935e-16 2.9524e+00 peek node 1 2 # save recorder 1 diff --git a/Example/Solver/SupportMotionBatheV.supan b/Example/Solver/SupportMotionBatheV.supan index 3f02f863a..81db28674 100644 --- a/Example/Solver/SupportMotionBatheV.supan +++ b/Example/Solver/SupportMotionBatheV.supan @@ -20,16 +20,27 @@ supportvelocity 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 1E-2 set fixed_step_size true -integrator BatheTwoStep 1 +integrator BatheTwoStep 1 .2 converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -3.0649e-01 2.4660e-18 6.0049e-01 +# Resistance: +# -1.1055e+02 4.0147e-15 4.7749e+00 +# Velocity: +# 1.1059e+00 -1.6142e-16 -7.6754e-03 +# Acceleration: +# 1.3227e+01 -7.0120e-15 -2.8148e+02 peek node 1 2 # save recorder 1 diff --git a/Example/Solver/SupportMotionD.supan b/Example/Solver/SupportMotionD.supan index 25850b43a..9a6736742 100644 --- a/Example/Solver/SupportMotionD.supan +++ b/Example/Solver/SupportMotionD.supan @@ -20,7 +20,7 @@ supportdisplacement 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 5E-2 set fixed_step_size true @@ -30,6 +30,17 @@ converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# 1.0892e+00 2.1307e-17 -8.2083e+00 +# Resistance: +# -4.7306e+03 1.2609e-13 -3.1861e+03 +# Velocity: +# 8.4889e+00 1.0537e-15 1.2187e+03 +# Acceleration: +# -2.5940e+03 -8.3703e-14 1.1645e+06 peek node 1 2 exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionGAE.supan b/Example/Solver/SupportMotionGAE.supan new file mode 100644 index 000000000..c0a79d151 --- /dev/null +++ b/Example/Solver/SupportMotionGAE.supan @@ -0,0 +1,50 @@ +# A TEST MODEL FOR SUPPORTMOTION INTEGRATOR + +node 1 0 0 +node 2 0 1 + +material Elastic1D 1 100 + +section Rectangle2D 1 12 1 1 + +element B21 1 1 2 1 6 + +element MassPoint2D 2 1 11 11 +element MassPoint2D 3 2 22 22 + +fix2 1 2 1 +fix2 2 3 1 + +amplitude Tabular 1 EZ + +supportacceleration 2 1 .2 1 1 + +hdf5recorder 1 Node U1 1 2 + +step explicitdynamic 1 1 +set ini_step_size 1E-3 +set fixed_step_size true +set linear_system + +integrator GeneralisedAlphaExplicit 1 .6 + +converger AbsIncreAcc 1 1E-10 4 0 + +analyze + +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.1062e-01 5.2650e-20 -4.1510e-02 +# Resistance: +# -3.2336e+01 -5.2215e-16 -2.0319e+01 +# Velocity: +# 9.9993e-03 4.1454e-19 7.5238e-02 +# Acceleration: +# 1.4698e+00 1.7756e-17 9.2360e-01 +peek node 1 2 + +peek integrator 1 + +exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionGSSSSA.supan b/Example/Solver/SupportMotionGSSSSA.supan index 156c733c6..86678db83 100644 --- a/Example/Solver/SupportMotionGSSSSA.supan +++ b/Example/Solver/SupportMotionGSSSSA.supan @@ -11,8 +11,8 @@ element B21 1 1 2 1 6 mass 2 2 10 1 -fix 1 2 1 -fix 2 3 1 +fix2 1 2 1 +fix2 2 3 1 amplitude Tabular 1 EZ @@ -20,7 +20,7 @@ supportacceleration 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 1E-2 set fixed_step_size true @@ -30,8 +30,21 @@ converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.4281e-01 6.3982e-19 5.7279e-02 +# Resistance: +# -1.1586e+01 6.2720e-16 -6.4907e-02 +# Velocity: +# -3.0230e-01 -6.6369e-20 6.0667e-01 +# Acceleration: +# 1.0789e+00 -2.6537e-16 -1.8176e+00 peek node 1 2 +peek integrator 1 + # save recorder 1 exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionGSSSSV.supan b/Example/Solver/SupportMotionGSSSSV.supan index c2fa424d0..d683a8f5b 100644 --- a/Example/Solver/SupportMotionGSSSSV.supan +++ b/Example/Solver/SupportMotionGSSSSV.supan @@ -11,8 +11,8 @@ element B21 1 1 2 1 6 mass 2 2 10 1 -fix 1 2 1 -fix 2 3 1 +fix2 1 2 1 +fix2 2 3 1 amplitude Tabular 1 EZ @@ -20,7 +20,7 @@ supportvelocity 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 1E-2 set fixed_step_size true @@ -30,6 +30,17 @@ converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -3.0497e-01 2.9176e-18 6.1005e-01 +# Resistance: +# -1.0254e+02 3.2517e-15 9.7347e+00 +# Velocity: +# 1.0789e+00 -6.4215e-18 -1.8176e+00 +# Acceleration: +# 1.5064e+01 5.2593e-16 -7.4586e+02 peek node 1 2 # save recorder 1 diff --git a/Example/Solver/SupportMotionOALTSA.supan b/Example/Solver/SupportMotionOALTSA.supan new file mode 100644 index 000000000..c35c9cbf8 --- /dev/null +++ b/Example/Solver/SupportMotionOALTSA.supan @@ -0,0 +1,50 @@ +# A TEST MODEL FOR SUPPORTMOTION INTEGRATOR + +node 1 0 0 +node 2 0 1 + +material Elastic1D 1 100 .1 + +section Rectangle2D 1 12 1 1 + +element B21 1 1 2 1 6 + +mass 2 2 10 1 + +fix 1 2 1 +fix 2 3 1 + +amplitude Tabular 1 EZ + +supportacceleration 2 1 .2 1 1 + +hdf5recorder 1 Node U1 1 2 + +step dynamic 1 1 +set ini_step_size 1E-2 +set fixed_step_size true + +integrator OALTS 1 .8 + +converger RelIncreDisp 1 1E-10 4 1 + +analyze + +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.4318e-01 -1.0060e-19 5.7916e-02 +# Resistance: +# -1.1795e+01 -4.2724e-16 -1.0590e-01 +# Velocity: +# -3.0084e-01 -3.6004e-18 5.6560e-01 +# Acceleration: +# 1.1147e+00 4.7558e-16 1.2138e-01 +peek node 1 2 + +peek integrator 1 + +# save recorder 1 + +exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionOALTSV.supan b/Example/Solver/SupportMotionOALTSV.supan new file mode 100644 index 000000000..04e690cb1 --- /dev/null +++ b/Example/Solver/SupportMotionOALTSV.supan @@ -0,0 +1,50 @@ +# A TEST MODEL FOR SUPPORTMOTION INTEGRATOR + +node 1 0 0 +node 2 0 1 + +material Elastic1D 1 100 .1 + +section Rectangle2D 1 12 1 1 + +element B21 1 1 2 1 6 + +mass 2 2 10 1 + +fix 1 2 1 +fix 2 3 1 + +amplitude Tabular 1 EZ + +supportvelocity 2 1 .2 1 1 + +hdf5recorder 1 Node U1 1 2 + +step dynamic 1 1 +set ini_step_size 1E-2 +set fixed_step_size true + +integrator OALTS 1 .5 + +converger RelIncreDisp 1 1E-10 4 1 + +analyze + +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -3.0098e-01 2.5323e-18 6.0977e-01 +# Resistance: +# -1.0322e+02 5.8638e-15 9.3686e+00 +# Velocity: +# 1.1299e+00 1.2093e-16 -3.3723e+00 +# Acceleration: +# 1.4985e+01 5.2548e-15 -6.8136e+02 +peek node 1 2 + +# save recorder 1 + +reset +clear +exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionTchamwaA.supan b/Example/Solver/SupportMotionTchamwaA.supan new file mode 100644 index 000000000..c5713987d --- /dev/null +++ b/Example/Solver/SupportMotionTchamwaA.supan @@ -0,0 +1,48 @@ +# A TEST MODEL FOR SUPPORTMOTION INTEGRATOR + +node 1 0 0 +node 2 0 1 + +material Elastic1D 1 100 + +section Rectangle2D 1 12 1 1 + +element B21 1 1 2 1 6 + +element MassPoint2D 2 1 11 11 +element MassPoint2D 3 2 22 22 + +fix2 1 2 1 +fix2 2 3 1 + +amplitude Tabular 1 EZ + +supportacceleration 2 1 .2 1 1 + +hdf5recorder 1 Node U1 1 2 + +step explicitdynamic 1 1 +set ini_step_size 1E-3 +set fixed_step_size true +set linear_system + +integrator Tchamwa 1 .6 + +converger AbsIncreAcc 1 1E-10 4 0 + +analyze + +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -1.1056e-01 -7.4072e-20 -4.1445e-02 +# Resistance: +# -3.2250e+01 -4.2338e-16 -2.0270e+01 +# Velocity: +# 8.8158e-03 -5.2947e-19 7.4556e-02 +# Acceleration: +# 1.4659e+00 1.9244e-17 9.2135e-01 +peek node 1 2 + +exit \ No newline at end of file diff --git a/Example/Solver/SupportMotionV.supan b/Example/Solver/SupportMotionV.supan index 60556dbc7..16bfd4673 100644 --- a/Example/Solver/SupportMotionV.supan +++ b/Example/Solver/SupportMotionV.supan @@ -20,7 +20,7 @@ supportvelocity 2 1 .2 1 1 hdf5recorder 1 Node U1 1 2 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 5E-2 set fixed_step_size true @@ -30,6 +30,17 @@ converger RelIncreDisp 1 1E-10 4 1 analyze +# Node 2: +# Coordinate: +# 0 1.0000 +# Displacement: +# -2.4057e-01 1.8586e-19 5.0382e-01 +# Resistance: +# -9.6640e+01 1.9287e-15 2.0618e+00 +# Velocity: +# 1.0892e+00 -8.5672e-17 -8.2083e+00 +# Acceleration: +# 8.4889e+00 -4.2898e-15 1.2187e+03 peek node 1 2 exit \ No newline at end of file diff --git a/Example/Solver/SymmPack.supan b/Example/Solver/SymmPack.supan index 2c82a6cf1..ce2ae3120 100644 --- a/Example/Solver/SymmPack.supan +++ b/Example/Solver/SymmPack.supan @@ -21,18 +21,20 @@ set sparse_mat 0 analyze # Node 2: +# Coordinate: # 0.5000 0 # Displacement: # 0.1498 0.6417 # Resistance: -# 7.5862E-09 1.0000E+00 +# 0 1.0000 # # Node 3: +# Coordinate: # 0.5000 0.5000 # Displacement: # -0.1665 0.6081 # Resistance: -# -6.2069E-09 1.0000E+00 +# -1.1102e-16 1.0000e+00 peek node 2 3 reset diff --git a/Example/Solver/TabularSpline.supan b/Example/Solver/TabularSpline.supan index b511ff705..7ace6e05c 100644 --- a/Example/Solver/TabularSpline.supan +++ b/Example/Solver/TabularSpline.supan @@ -24,7 +24,7 @@ modifier LumpedScale 1 1 modifier LumpedSimple 2 2 modifier Rayleigh 3 .2 .002 .0 .0 -step dynamic 1 25 +step dynamic 1 1 set ini_step_size 1E-2 set fixed_step_size true @@ -36,15 +36,15 @@ analyze # Node 2: # Coordinate: -# 0 1.0000 +# 0 1.0000 # Displacement: -# -0.0280 0.0486 0.0139 +# 0.0323 -0.0002 -0.0487 # Resistance: -# -1.1386 -0.0005 0.0004 +# 9.8137 0.0958 -0.0076 # Velocity: -# -0.0558 -0.0011 0.0798 +# 0.3721 -0.0057 -0.5421 # Acceleration: -# 0.1214 0.0007 -0.1740 +# -0.3431 -0.0771 1.3227 peek node 2 peek integrator 1 diff --git a/Example/Solver/Tchamwa.supan b/Example/Solver/Tchamwa.supan new file mode 100644 index 000000000..533bbfcbe --- /dev/null +++ b/Example/Solver/Tchamwa.supan @@ -0,0 +1,59 @@ +node 1 0 0 +node 2 0 -2 +node 3 0 -3 +node 4 0 -5 + +material Elastic1D 1 1E7 + +element T2D2 1 1 2 1 1 true +element T2D2 2 2 3 1 1 true +element T2D2 3 3 4 1 1 true + +element Mass 4 1 10 1 2 +element Mass 5 2 20 1 2 +element Mass 6 3 30 1 2 +element Mass 7 4 40 1 2 + +fix2 1 P 1 + +initial velocity 25 1 3 + +amplitude Constant 1 +cload 1 1 -200 2 2 +cload 2 1 -100 2 3 +cload 3 1 -200 2 4 + +hdf5recorder 1 Node U 2 3 4 + +step explicitdynamic 1 1 +set ini_step_size 1E-3 +set fixed_step_size 1 +set symm_mat 0 +set linear_system + +integrator Tchamwa 1 + +converger RelIncreAcc 1 1E-10 10 0 + +analyze + +# Node 2: +# Coordinate: +# 0 -2.0000 +# Displacement: +# -1.8359 2.7823 +# Resistance: +# 7.6666e+03 -1.1685e+04 +# Velocity: +# 0.4647 -1.8641 +# Acceleration: +# -3.8333e+02 5.7425e+02 +peek node 1 2 3 4 + +peek integrator 1 + +# save recorder 1 + +reset +clear +exit \ No newline at end of file diff --git a/Example/Solver/WilsonPenzien.supan b/Example/Solver/WilsonPenzien.supan index 96e890438..99a4d5cd7 100644 --- a/Example/Solver/WilsonPenzien.supan +++ b/Example/Solver/WilsonPenzien.supan @@ -30,15 +30,16 @@ converger RelIncreDisp 2 1E-10 3 1 analyze # Node 2: -# 1.0000 0 +# Coordinate: +# 1.0000 0 # Displacement: -# -2.2334e-19 1.3646e-01 2.0368e-01 +# 2.2366e-19 1.3646e-01 2.0368e-01 # Resistance: -# -2.6801e-17 4.1541e+00 -4.0213e-02 +# 2.6840e-17 4.1541e+00 -4.0213e-02 # Velocity: -# -6.9977e-20 -8.2125e-01 -1.5051e+00 +# 2.1517e-18 -8.2125e-01 -1.5051e+00 # Acceleration: -# 3.9755e-16 -3.4511e+00 1.4432e+01 +# -3.2944e-16 -3.4511e+00 1.4432e+01 peek node 2 peek integrator 1 diff --git a/Example/Solver/mNewton.supan b/Example/Solver/mNewton.supan index 475c39114..922a3a4ae 100644 --- a/Example/Solver/mNewton.supan +++ b/Example/Solver/mNewton.supan @@ -14,7 +14,7 @@ fix 1 P 1 3 step static 1 solver mNewton 1 -set ini_step_size .1 +set ini_step_size 1E-2 cload 1 0 100 2 2 @@ -23,11 +23,12 @@ converger RelIncreDisp 1 1E-8 10 1 analyze # Node 2: -# 4.0000 0 +# Coordinate: +# 4.0000 0 # Displacement: -# -3.5333 14.8500 +# -3.5333 14.8500 # Resistance: -# 1.3333e-06 1.0000e+02 +# -2.1095e-09 1.0000e+02 peek node 2 peek solver 1 diff --git a/Example/Solver/mumps.supan b/Example/Solver/mumps.supan index 5e7c22485..cf94685d0 100644 --- a/Example/Solver/mumps.supan +++ b/Example/Solver/mumps.supan @@ -23,11 +23,11 @@ converger RelIncreDisp 1 1E-10 4 1 analyze # Node 2: -# 4.0000 0 +# 4.0000 0 # Displacement: -# 1.2000 0 +# 1.2000 0 # Resistance: -# 1.0000e+02 0 +# 1.0000e+02 0 peek node 2 peek element 1 diff --git a/Include/armadillo/armadillo b/Include/armadillo/armadillo index 0c2e51d6c..a17ddb7b1 100644 --- a/Include/armadillo/armadillo +++ b/Include/armadillo/armadillo @@ -53,11 +53,29 @@ #endif #if defined(ARMA_USE_TBB_ALLOC) - #include + #if defined(__has_include) + #if __has_include() + #include + #else + #undef ARMA_USE_TBB_ALLOC + #pragma message ("WARNING: use of TBB alloc disabled; tbb/scalable_allocator.h header not found") + #endif + #else + #include + #endif #endif #if defined(ARMA_USE_MKL_ALLOC) - #include + #if defined(__has_include) + #if __has_include() + #include + #else + #undef ARMA_USE_MKL_ALLOC + #pragma message ("WARNING: use of MKL alloc disabled; mkl_service.h header not found") + #endif + #else + #include + #endif #endif #if ( defined(__unix__) || defined(__unix) || defined(_POSIX_C_SOURCE) || (defined(__APPLE__) && defined(__MACH__)) ) && !defined(_WIN32) @@ -69,7 +87,16 @@ #if defined(ARMA_USE_OPENMP) - #include + #if defined(__has_include) + #if __has_include() + #include + #else + #undef ARMA_USE_OPENMP + #pragma message ("WARNING: use of OpenMP disabled; omp.h header not found") + #endif + #else + #include + #endif #endif @@ -205,7 +232,6 @@ namespace arma #include "armadillo_bits/eop_core_bones.hpp" #include "armadillo_bits/eglue_core_bones.hpp" - #include "armadillo_bits/GenSpecialiser.hpp" #include "armadillo_bits/Gen_bones.hpp" #include "armadillo_bits/GenCube_bones.hpp" @@ -274,6 +300,7 @@ namespace arma #include "armadillo_bits/op_wishrnd_bones.hpp" #include "armadillo_bits/op_roots_bones.hpp" #include "armadillo_bits/op_cond_bones.hpp" + #include "armadillo_bits/op_rcond_bones.hpp" #include "armadillo_bits/op_sp_plus_bones.hpp" #include "armadillo_bits/op_sp_minus_bones.hpp" #include "armadillo_bits/op_powmat_bones.hpp" @@ -303,6 +330,7 @@ namespace arma #include "armadillo_bits/glue_affmul_bones.hpp" #include "armadillo_bits/glue_mvnrnd_bones.hpp" #include "armadillo_bits/glue_quantile_bones.hpp" + #include "armadillo_bits/glue_powext_bones.hpp" #include "armadillo_bits/gmm_misc_bones.hpp" #include "armadillo_bits/gmm_diag_bones.hpp" @@ -503,7 +531,7 @@ namespace arma #include "armadillo_bits/fn_inplace_trans.hpp" #include "armadillo_bits/fn_randi.hpp" #include "armadillo_bits/fn_randg.hpp" - #include "armadillo_bits/fn_cond.hpp" + #include "armadillo_bits/fn_cond_rcond.hpp" #include "armadillo_bits/fn_normalise.hpp" #include "armadillo_bits/fn_clamp.hpp" #include "armadillo_bits/fn_expmat.hpp" @@ -532,6 +560,7 @@ namespace arma #include "armadillo_bits/fn_randperm.hpp" #include "armadillo_bits/fn_quantile.hpp" #include "armadillo_bits/fn_powmat.hpp" + #include "armadillo_bits/fn_powext.hpp" #include "armadillo_bits/fn_speye.hpp" #include "armadillo_bits/fn_spones.hpp" @@ -706,6 +735,7 @@ namespace arma #include "armadillo_bits/op_wishrnd_meat.hpp" #include "armadillo_bits/op_roots_meat.hpp" #include "armadillo_bits/op_cond_meat.hpp" + #include "armadillo_bits/op_rcond_meat.hpp" #include "armadillo_bits/op_sp_plus_meat.hpp" #include "armadillo_bits/op_sp_minus_meat.hpp" #include "armadillo_bits/op_powmat_meat.hpp" @@ -735,6 +765,7 @@ namespace arma #include "armadillo_bits/glue_affmul_meat.hpp" #include "armadillo_bits/glue_mvnrnd_meat.hpp" #include "armadillo_bits/glue_quantile_meat.hpp" + #include "armadillo_bits/glue_powext_meat.hpp" #include "armadillo_bits/gmm_misc_meat.hpp" #include "armadillo_bits/gmm_diag_meat.hpp" diff --git a/Include/armadillo/armadillo_bits/Col_bones.hpp b/Include/armadillo/armadillo_bits/Col_bones.hpp index 8e5606593..d91c89124 100644 --- a/Include/armadillo/armadillo_bits/Col_bones.hpp +++ b/Include/armadillo/armadillo_bits/Col_bones.hpp @@ -88,7 +88,7 @@ class Col : public Mat inline Col(const subview_cube& X); inline Col& operator=(const subview_cube& X); - arma_cold inline mat_injector operator<<(const eT val); + arma_deprecated inline mat_injector operator<<(const eT val); arma_inline arma_warn_unused const Op,op_htrans> t() const; arma_inline arma_warn_unused const Op,op_htrans> ht() const; @@ -138,7 +138,9 @@ class Col : public Mat template inline void shed_rows(const Base& indices); - inline void insert_rows(const uword row_num, const uword N, const bool set_to_zero = true); + arma_deprecated inline void insert_rows(const uword row_num, const uword N, const bool set_to_zero); + inline void insert_rows(const uword row_num, const uword N); + template inline void insert_rows(const uword row_num, const Base& X); diff --git a/Include/armadillo/armadillo_bits/Col_meat.hpp b/Include/armadillo/armadillo_bits/Col_meat.hpp index 9383bdbf9..c59ab0298 100644 --- a/Include/armadillo/armadillo_bits/Col_meat.hpp +++ b/Include/armadillo/armadillo_bits/Col_meat.hpp @@ -51,12 +51,11 @@ Col::Col(const uword in_n_elem) { arma_extra_debug_sigprint(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Col::constructor: zeroing memory"); arrayops::fill_zeros(Mat::memptr(), Mat::n_elem); } - #endif } @@ -70,12 +69,11 @@ Col::Col(const uword in_n_rows, const uword in_n_cols) Mat::init_warm(in_n_rows, in_n_cols); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Col::constructor: zeroing memory"); arrayops::fill_zeros(Mat::memptr(), Mat::n_elem); } - #endif } @@ -89,12 +87,11 @@ Col::Col(const SizeMat& s) Mat::init_warm(s.n_rows, s.n_cols); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Col::constructor: zeroing memory"); arrayops::fill_zeros(Mat::memptr(), Mat::n_elem); } - #endif } @@ -314,10 +311,9 @@ Col::Col(const std::vector& x) { arma_extra_debug_sigprint_this(this); - if(x.size() > 0) - { - arrayops::copy( Mat::memptr(), &(x[0]), uword(x.size()) ); - } + const uword N = uword(x.size()); + + if(N > 0) { arrayops::copy( Mat::memptr(), &(x[0]), N ); } } @@ -330,12 +326,11 @@ Col::operator=(const std::vector& x) { arma_extra_debug_sigprint(); - Mat::init_warm(uword(x.size()), 1); + const uword N = uword(x.size()); - if(x.size() > 0) - { - arrayops::copy( Mat::memptr(), &(x[0]), uword(x.size()) ); - } + Mat::init_warm(N, 1); + + if(N > 0) { arrayops::copy( Mat::memptr(), &(x[0]), N ); } return *this; } @@ -345,11 +340,13 @@ Col::operator=(const std::vector& x) template inline Col::Col(const std::initializer_list& list) - : Mat(arma_vec_indicator(), 1) + : Mat(arma_vec_indicator(), uword(list.size()), 1, 1) { - arma_extra_debug_sigprint(); + arma_extra_debug_sigprint_this(this); - (*this).operator=(list); + const uword N = uword(list.size()); + + if(N > 0) { arrayops::copy( Mat::memptr(), list.begin(), N ); } } @@ -361,14 +358,11 @@ Col::operator=(const std::initializer_list& list) { arma_extra_debug_sigprint(); - Mat tmp(list); + const uword N = uword(list.size()); - arma_debug_check( ((tmp.n_elem > 0) && (tmp.is_vec() == false)), "Mat::init(): requested size is not compatible with column vector layout" ); + Mat::init_warm(N, 1); - access::rw(tmp.n_rows) = tmp.n_elem; - access::rw(tmp.n_cols) = 1; - - (*this).steal_mem(tmp); + if(N > 0) { arrayops::copy( Mat::memptr(), list.begin(), N ); } return *this; } @@ -424,15 +418,7 @@ Col::operator=(Col&& X) { arma_extra_debug_sigprint(arma_str::format("this = %x X = %x") % this % &X); - (*this).steal_mem(X); - - if( (X.mem_state == 0) && (X.n_alloc <= arma_config::mat_prealloc) && (this != &X) ) - { - access::rw(X.n_rows) = 0; - access::rw(X.n_cols) = 1; - access::rw(X.n_elem) = 0; - access::rw(X.mem) = nullptr; - } + (*this).steal_mem(X, true); return *this; } @@ -625,7 +611,7 @@ Col::operator=(const subview_cube& X) template inline -arma_cold +arma_deprecated mat_injector< Col > Col::operator<<(const eT val) { @@ -1068,15 +1054,28 @@ Col::shed_rows(const Base& indices) -//! insert N rows at the specified row position, -//! optionally setting the elements of the inserted rows to zero template +arma_deprecated inline void Col::insert_rows(const uword row_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_rows(row_num, N); + } + + + +template +inline +void +Col::insert_rows(const uword row_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_rows = Mat::n_rows; const uword A_n_rows = row_num; @@ -1085,30 +1084,26 @@ Col::insert_rows(const uword row_num, const uword N, const bool set_to_zero) // insertion at row_num == n_rows is in effect an append operation arma_debug_check_bounds( (row_num > t_n_rows), "Col::insert_rows(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Col out(t_n_rows + N, arma_nozeros_indicator()); + + eT* out_mem = out.memptr(); + const eT* t_mem = (*this).memptr(); + + if(A_n_rows > 0) { - Col out(t_n_rows + N, arma_nozeros_indicator()); - - eT* out_mem = out.memptr(); - const eT* t_mem = (*this).memptr(); - - if(A_n_rows > 0) - { - arrayops::copy( out_mem, t_mem, A_n_rows ); - } - - if(B_n_rows > 0) - { - arrayops::copy( &(out_mem[row_num + N]), &(t_mem[row_num]), B_n_rows ); - } - - if(set_to_zero) - { - arrayops::inplace_set( &(out_mem[row_num]), eT(0), N ); - } - - Mat::steal_mem(out); + arrayops::copy( out_mem, t_mem, A_n_rows ); } + + if(B_n_rows > 0) + { + arrayops::copy( &(out_mem[row_num + N]), &(t_mem[row_num]), B_n_rows ); + } + + arrayops::fill_zeros( &(out_mem[row_num]), N ); + + Mat::steal_mem(out); } @@ -1236,7 +1231,7 @@ Col::fixed::fixed() { arma_extra_debug_sigprint_this(this); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Col::fixed::constructor: zeroing memory"); @@ -1244,7 +1239,6 @@ Col::fixed::fixed() arrayops::inplace_set_fixed( mem_use, eT(0) ); } - #endif } diff --git a/Include/armadillo/armadillo_bits/Cube_bones.hpp b/Include/armadillo/armadillo_bits/Cube_bones.hpp index d42fbb0c2..c15e1ec26 100644 --- a/Include/armadillo/armadillo_bits/Cube_bones.hpp +++ b/Include/armadillo/armadillo_bits/Cube_bones.hpp @@ -39,13 +39,13 @@ class Cube : public BaseCube< eT, Cube > typedef eT elem_type; //!< the type of elements stored in the cube typedef typename get_pod_type::result pod_type; //!< if eT is std::complex, pod_type is T; otherwise pod_type is eT - const uword n_rows; //!< number of rows in each slice (read-only) - const uword n_cols; //!< number of columns in each slice (read-only) - const uword n_elem_slice; //!< number of elements in each slice (read-only) - const uword n_slices; //!< number of slices in the cube (read-only) - const uword n_elem; //!< number of elements in the cube (read-only) - const uword n_alloc; //!< number of allocated elements (read-only); NOTE: n_alloc can be 0, even if n_elem > 0 - const uword mem_state; + const uword n_rows; //!< number of rows in each slice (read-only) + const uword n_cols; //!< number of columns in each slice (read-only) + const uword n_elem_slice; //!< number of elements in each slice (read-only) + const uword n_slices; //!< number of slices in the cube (read-only) + const uword n_elem; //!< number of elements in the cube (read-only) + const uword n_alloc; //!< number of allocated elements (read-only); NOTE: n_alloc can be 0, even if n_elem > 0 + const uword mem_state; // mem_state = 0: normal cube which manages its own memory // mem_state = 1: use auxiliary memory until a size change @@ -57,10 +57,27 @@ class Cube : public BaseCube< eT, Cube > protected: - arma_aligned const Mat** const mat_ptrs; + using mat_type = Mat; + + #if defined(ARMA_USE_OPENMP) + using raw_mat_ptr_type = mat_type*; + using atomic_mat_ptr_type = mat_type*; + #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) + using raw_mat_ptr_type = mat_type*; + using atomic_mat_ptr_type = std::atomic; + #else + using raw_mat_ptr_type = mat_type*; + using atomic_mat_ptr_type = mat_type*; + #endif + + atomic_mat_ptr_type* mat_ptrs = nullptr; + + #if (!defined(ARMA_DONT_USE_STD_MUTEX)) + mutable std::mutex mat_mutex; // required for slice() + #endif - arma_align_mem Mat* mat_ptrs_local[ Cube_prealloc::mat_ptrs_size ]; - arma_align_mem eT mem_local[ Cube_prealloc::mem_n_elem ]; // local storage, for small cubes + arma_aligned atomic_mat_ptr_type mat_ptrs_local[ Cube_prealloc::mat_ptrs_size ]; + arma_align_mem eT mem_local[ Cube_prealloc::mem_n_elem ]; // local storage, for small cubes public: @@ -124,7 +141,7 @@ class Cube : public BaseCube< eT, Cube > inline Mat& slice(const uword in_slice); inline const Mat& slice(const uword in_slice) const; - + arma_inline subview_cube rows(const uword in_row1, const uword in_row2); arma_inline const subview_cube rows(const uword in_row1, const uword in_row2) const; @@ -201,13 +218,18 @@ class Cube : public BaseCube< eT, Cube > template inline void shed_slices(const Base& indices); - inline void insert_rows(const uword row_num, const uword N, const bool set_to_zero = true); - inline void insert_cols(const uword row_num, const uword N, const bool set_to_zero = true); - inline void insert_slices(const uword slice_num, const uword N, const bool set_to_zero = true); + arma_deprecated inline void insert_rows(const uword row_num, const uword N, const bool set_to_zero); + arma_deprecated inline void insert_cols(const uword row_num, const uword N, const bool set_to_zero); + arma_deprecated inline void insert_slices(const uword slice_num, const uword N, const bool set_to_zero); + + inline void insert_rows(const uword row_num, const uword N); + inline void insert_cols(const uword row_num, const uword N); + inline void insert_slices(const uword slice_num, const uword N); template inline void insert_rows(const uword row_num, const BaseCube& X); template inline void insert_cols(const uword col_num, const BaseCube& X); template inline void insert_slices(const uword slice_num, const BaseCube& X); + template inline void insert_slices(const uword slice_num, const Base& X); template inline Cube(const GenCube& X); @@ -423,7 +445,8 @@ class Cube : public BaseCube< eT, Cube > inline void swap(Cube& B); - inline void steal_mem(Cube& X); //!< don't use this unless you're writing code internal to Armadillo + inline void steal_mem(Cube& X); //!< don't use this unless you're writing code internal to Armadillo + inline void steal_mem(Cube& X, const bool is_move); //!< don't use this unless you're writing code internal to Armadillo template class fixed; @@ -439,6 +462,9 @@ class Cube : public BaseCube< eT, Cube > inline void delete_mat(); inline void create_mat(); + inline Mat* create_mat_ptr(const uword in_slice) const; + inline Mat* get_mat_ptr(const uword in_slice) const; + friend class glue_join; friend class op_reshape; friend class op_resize; @@ -465,8 +491,8 @@ class Cube::fixed : public Cube static constexpr bool use_extra = (fixed_n_elem > Cube_prealloc::mem_n_elem); - arma_aligned Mat* mat_ptrs_local_extra[ (fixed_n_slices > Cube_prealloc::mat_ptrs_size) ? fixed_n_slices : 1 ]; - arma_align_mem eT mem_local_extra [ use_extra ? fixed_n_elem : 1 ]; + arma_aligned atomic_mat_ptr_type mat_ptrs_local_extra[ (fixed_n_slices > Cube_prealloc::mat_ptrs_size) ? fixed_n_slices : 1 ]; + arma_align_mem eT mem_local_extra[ use_extra ? fixed_n_elem : 1 ]; arma_inline void mem_setup(); diff --git a/Include/armadillo/armadillo_bits/Cube_meat.hpp b/Include/armadillo/armadillo_bits/Cube_meat.hpp index 5b1838b65..b8b24b001 100644 --- a/Include/armadillo/armadillo_bits/Cube_meat.hpp +++ b/Include/armadillo/armadillo_bits/Cube_meat.hpp @@ -35,11 +35,7 @@ Cube::~Cube() } // try to expose buggy user code that accesses deleted objects - if(arma_config::debug) - { - access::rw(mem) = nullptr; - access::rw(mat_ptrs) = nullptr; - } + if(arma_config::debug) { access::rw(mem) = nullptr; } arma_type_check(( is_supported_elem_type::value == false )); } @@ -57,7 +53,6 @@ Cube::Cube() , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); } @@ -76,18 +71,16 @@ Cube::Cube(const uword in_n_rows, const uword in_n_cols, const uword in_n_sl , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); init_cold(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Cube::constructor: zeroing memory"); arrayops::fill_zeros(memptr(), n_elem); } - #endif } @@ -103,18 +96,16 @@ Cube::Cube(const SizeCube& s) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); init_cold(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Cube::constructor: zeroing memory"); arrayops::fill_zeros(memptr(), n_elem); } - #endif } @@ -132,7 +123,6 @@ Cube::Cube(const uword in_n_rows, const uword in_n_cols, const uword in_n_sl , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -160,7 +150,6 @@ Cube::Cube(const SizeCube& s, const arma_initmode_indicator&) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -188,7 +177,6 @@ Cube::Cube(const uword in_n_rows, const uword in_n_cols, const uword in_n_sl , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -216,7 +204,6 @@ Cube::Cube(const SizeCube& s, const fill::fill_class&) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -244,7 +231,6 @@ Cube::Cube(const uword in_n_rows, const uword in_n_cols, const uword in_n_sl , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -266,7 +252,6 @@ Cube::Cube(const SizeCube& s, const fill::scalar_holder f) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -288,12 +273,10 @@ Cube::Cube(Cube&& in_cube) , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { - arma_extra_debug_sigprint_this(this); arma_extra_debug_sigprint(arma_str::format("this = %x in_cube = %x") % this % &in_cube); - (*this).steal_mem(in_cube); + (*this).steal_mem(in_cube, true); } @@ -305,7 +288,7 @@ Cube::operator=(Cube&& in_cube) { arma_extra_debug_sigprint(arma_str::format("this = %x in_cube = %x") % this % &in_cube); - (*this).steal_mem(in_cube); + (*this).steal_mem(in_cube, true); return *this; } @@ -370,12 +353,14 @@ Cube::init_warm(const uword in_n_rows, const uword in_n_cols, const uword in bool err_state = false; char* err_msg = nullptr; - arma_debug_set_error( err_state, err_msg, (t_mem_state == 3), "Cube::init(): size is fixed and hence cannot be changed" ); + const char* error_message_1 = "Cube::init(): size is fixed and hence cannot be changed"; + + arma_debug_set_error( err_state, err_msg, (t_mem_state == 3), error_message_1 ); #if defined(ARMA_64BIT_WORD) - const char* error_message = "Cube::init(): requested size is too large"; + const char* error_message_2 = "Cube::init(): requested size is too large"; #else - const char* error_message = "Cube::init(): requested size is too large; suggest to enable ARMA_64BIT_WORD"; + const char* error_message_2 = "Cube::init(): requested size is too large; suggest to enable ARMA_64BIT_WORD"; #endif arma_debug_set_error @@ -387,7 +372,7 @@ Cube::init_warm(const uword in_n_rows, const uword in_n_cols, const uword in ? ( (double(in_n_rows) * double(in_n_cols) * double(in_n_slices)) > double(ARMA_MAX_UWORD) ) : false ), - error_message + error_message_2 ); arma_debug_check(err_state, err_msg); @@ -542,14 +527,23 @@ Cube::delete_mat() if((n_slices > 0) && (mat_ptrs != nullptr)) { - for(uword uslice = 0; uslice < n_slices; ++uslice) + for(uword s=0; s < n_slices; ++s) { - if(mat_ptrs[uslice] != nullptr) { delete access::rw(mat_ptrs[uslice]); } + raw_mat_ptr_type mat_ptr = raw_mat_ptr_type(mat_ptrs[s]); // explicit cast to indicate load from std::atomic*> + + if(mat_ptr != nullptr) + { + arma_extra_debug_print( arma_str::format("Cube::delete_mat(): destroying matrix %d") % s ); + delete mat_ptr; + mat_ptrs[s] = nullptr; + } } if( (mem_state <= 2) && (n_slices > Cube_prealloc::mat_ptrs_size) ) { + arma_extra_debug_print("Cube::delete_mat(): freeing mat_ptrs array"); delete [] mat_ptrs; + mat_ptrs = nullptr; } } } @@ -563,31 +557,110 @@ Cube::create_mat() { arma_extra_debug_sigprint(); - if(n_slices == 0) + if(n_slices == 0) { mat_ptrs = nullptr; return; } + + if(mem_state <= 2) { - access::rw(mat_ptrs) = nullptr; + if(n_slices <= Cube_prealloc::mat_ptrs_size) + { + arma_extra_debug_print("Cube::create_mat(): using local memory for mat_ptrs array"); + + mat_ptrs = mat_ptrs_local; + } + else + { + arma_extra_debug_print("Cube::create_mat(): allocating mat_ptrs array"); + + mat_ptrs = new(std::nothrow) atomic_mat_ptr_type[n_slices]; + + arma_check_bad_alloc( (mat_ptrs == nullptr), "Cube::create_mat(): out of memory" ); + } } - else + + for(uword s=0; s < n_slices; ++s) { mat_ptrs[s] = nullptr; } + } + + + +template +inline +Mat* +Cube::create_mat_ptr(const uword in_slice) const + { + arma_extra_debug_sigprint(); + + arma_extra_debug_print( arma_str::format("Cube::create_mat_ptr(): creating matrix %d") % in_slice ); + + const eT* mat_mem = (n_elem_slice > 0) ? slice_memptr(in_slice) : nullptr; + + Mat* mat_ptr = new(std::nothrow) Mat('j', mat_mem, n_rows, n_cols); + + return mat_ptr; + } + + + +template +inline +Mat* +Cube::get_mat_ptr(const uword in_slice) const + { + arma_extra_debug_sigprint(); + + raw_mat_ptr_type mat_ptr = nullptr; + + #if defined(ARMA_USE_OPENMP) { - if(mem_state <= 2) + #pragma omp atomic read + mat_ptr = mat_ptrs[in_slice]; + } + #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) + { + mat_ptr = mat_ptrs[in_slice].load(); + } + #else + { + mat_ptr = mat_ptrs[in_slice]; + } + #endif + + if(mat_ptr == nullptr) + { + #if defined(ARMA_USE_OPENMP) { - if(n_slices <= Cube_prealloc::mat_ptrs_size) + #pragma omp critical (arma_Cube_mat_ptrs) { - access::rw(mat_ptrs) = const_cast< const Mat** >(mat_ptrs_local); - } - else - { - access::rw(mat_ptrs) = new(std::nothrow) const Mat*[n_slices]; + #pragma omp atomic read + mat_ptr = mat_ptrs[in_slice]; + + if(mat_ptr == nullptr) { mat_ptr = create_mat_ptr(in_slice); } - arma_check_bad_alloc( (mat_ptrs == nullptr), "Cube::create_mat(): out of memory" ); + #pragma omp atomic write + mat_ptrs[in_slice] = mat_ptr; } } - - for(uword uslice = 0; uslice < n_slices; ++uslice) + #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - mat_ptrs[uslice] = nullptr; + const std::lock_guard lock(mat_mutex); + + mat_ptr = mat_ptrs[in_slice].load(); + + if(mat_ptr == nullptr) { mat_ptr = create_mat_ptr(in_slice); } + + mat_ptrs[in_slice].store(mat_ptr); + } + #else + { + mat_ptr = create_mat_ptr(in_slice); + + mat_ptrs[in_slice] = mat_ptr; } + #endif + + arma_check_bad_alloc( (mat_ptr == nullptr), "Cube::get_mat_ptr(): out of memory" ); } + + return mat_ptr; } @@ -682,7 +755,6 @@ Cube::Cube(const Cube& x) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); arma_extra_debug_sigprint(arma_str::format("this = %x in_cube = %x") % this % &x); @@ -728,11 +800,10 @@ Cube::Cube(eT* aux_mem, const uword aux_n_rows, const uword aux_n_cols, cons , n_alloc ( 0 ) , mem_state ( copy_aux_mem ? 0 : (strict ? 2 : 1) ) , mem ( copy_aux_mem ? nullptr : aux_mem ) - , mat_ptrs ( nullptr ) { arma_extra_debug_sigprint_this(this); - if(prealloc_mat) { arma_debug_warn_level(3, "Cube::Cube(): parameter 'prealloc_mat' ignored as it's no longer used"); } + arma_ignore(prealloc_mat); // kept only for compatibility with old user code if(copy_aux_mem) { @@ -761,7 +832,6 @@ Cube::Cube(const eT* aux_mem, const uword aux_n_rows, const uword aux_n_cols , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -857,7 +927,6 @@ Cube::Cube , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -878,7 +947,6 @@ Cube::Cube(const subview_cube& X) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -989,7 +1057,6 @@ Cube::Cube(const subview_cube_slices& X) , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -1154,14 +1221,7 @@ Cube::slice(const uword in_slice) arma_debug_check_bounds( (in_slice >= n_slices), "Cube::slice(): index out of bounds" ); - if(mat_ptrs[in_slice] == nullptr) - { - const eT* ptr = (n_elem_slice > 0) ? slice_memptr(in_slice) : nullptr; - - mat_ptrs[in_slice] = new Mat('j', ptr, n_rows, n_cols); - } - - return const_cast< Mat& >( *(mat_ptrs[in_slice]) ); + return *(get_mat_ptr(in_slice)); } @@ -1176,14 +1236,7 @@ Cube::slice(const uword in_slice) const arma_debug_check_bounds( (in_slice >= n_slices), "Cube::slice(): index out of bounds" ); - if(mat_ptrs[in_slice] == nullptr) - { - const eT* ptr = (n_elem_slice > 0) ? slice_memptr(in_slice) : nullptr; - - mat_ptrs[in_slice] = new Mat('j', ptr, n_rows, n_cols); - } - - return *(mat_ptrs[in_slice]); + return *(get_mat_ptr(in_slice)); } @@ -2261,12 +2314,27 @@ Cube::shed_slices(const Base& indices) template +arma_deprecated inline void Cube::insert_rows(const uword row_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_rows(row_num, N); + } + + + +template +inline +void +Cube::insert_rows(const uword row_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_rows = n_rows; const uword A_n_rows = row_num; @@ -2275,38 +2343,49 @@ Cube::insert_rows(const uword row_num, const uword N, const bool set_to_zero // insertion at row_num == n_rows is in effect an append operation arma_debug_check_bounds( (row_num > t_n_rows), "Cube::insert_rows(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Cube out(t_n_rows + N, n_cols, n_slices, arma_nozeros_indicator()); + + if(A_n_rows > 0) { - Cube out(t_n_rows + N, n_cols, n_slices, arma_nozeros_indicator()); - - if(A_n_rows > 0) - { - out.rows(0, A_n_rows-1) = rows(0, A_n_rows-1); - } - - if(B_n_rows > 0) - { - out.rows(row_num + N, t_n_rows + N - 1) = rows(row_num, t_n_rows-1); - } - - if(set_to_zero) - { - out.rows(row_num, row_num + N - 1).zeros(); - } - - steal_mem(out); + out.rows(0, A_n_rows-1) = rows(0, A_n_rows-1); + } + + if(B_n_rows > 0) + { + out.rows(row_num + N, t_n_rows + N - 1) = rows(row_num, t_n_rows-1); } + + out.rows(row_num, row_num + N - 1).zeros(); + + steal_mem(out); } template +arma_deprecated inline void Cube::insert_cols(const uword col_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_cols(col_num, N); + } + + + +template +inline +void +Cube::insert_cols(const uword col_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_cols = n_cols; const uword A_n_cols = col_num; @@ -2315,40 +2394,49 @@ Cube::insert_cols(const uword col_num, const uword N, const bool set_to_zero // insertion at col_num == n_cols is in effect an append operation arma_debug_check_bounds( (col_num > t_n_cols), "Cube::insert_cols(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Cube out(n_rows, t_n_cols + N, n_slices, arma_nozeros_indicator()); + + if(A_n_cols > 0) { - Cube out(n_rows, t_n_cols + N, n_slices, arma_nozeros_indicator()); - - if(A_n_cols > 0) - { - out.cols(0, A_n_cols-1) = cols(0, A_n_cols-1); - } - - if(B_n_cols > 0) - { - out.cols(col_num + N, t_n_cols + N - 1) = cols(col_num, t_n_cols-1); - } - - if(set_to_zero) - { - out.cols(col_num, col_num + N - 1).zeros(); - } - - steal_mem(out); + out.cols(0, A_n_cols-1) = cols(0, A_n_cols-1); + } + + if(B_n_cols > 0) + { + out.cols(col_num + N, t_n_cols + N - 1) = cols(col_num, t_n_cols-1); } + + out.cols(col_num, col_num + N - 1).zeros(); + + steal_mem(out); } -//! insert N slices at the specified slice position, -//! optionally setting the elements of the inserted slices to zero template +arma_deprecated inline void Cube::insert_slices(const uword slice_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_slices(slice_num, N); + } + + + +template +inline +void +Cube::insert_slices(const uword slice_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_slices = n_slices; const uword A_n_slices = slice_num; @@ -2357,32 +2445,28 @@ Cube::insert_slices(const uword slice_num, const uword N, const bool set_to_ // insertion at slice_num == n_slices is in effect an append operation arma_debug_check_bounds( (slice_num > t_n_slices), "Cube::insert_slices(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Cube out(n_rows, n_cols, t_n_slices + N, arma_nozeros_indicator()); + + if(A_n_slices > 0) { - Cube out(n_rows, n_cols, t_n_slices + N, arma_nozeros_indicator()); - - if(A_n_slices > 0) - { - out.slices(0, A_n_slices-1) = slices(0, A_n_slices-1); - } - - if(B_n_slices > 0) - { - out.slices(slice_num + N, t_n_slices + N - 1) = slices(slice_num, t_n_slices-1); - } - - if(set_to_zero) - { - //out.slices(slice_num, slice_num + N - 1).zeros(); - - for(uword i=slice_num; i < (slice_num + N); ++i) - { - arrayops::fill_zeros(out.slice_memptr(i), out.n_elem_slice); - } - } - - steal_mem(out); + out.slices(0, A_n_slices-1) = slices(0, A_n_slices-1); + } + + if(B_n_slices > 0) + { + out.slices(slice_num + N, t_n_slices + N - 1) = slices(slice_num, t_n_slices-1); + } + + //out.slices(slice_num, slice_num + N - 1).zeros(); + + for(uword i=slice_num; i < (slice_num + N); ++i) + { + arrayops::fill_zeros(out.slice_memptr(i), out.n_elem_slice); } + + steal_mem(out); } @@ -2414,24 +2498,23 @@ Cube::insert_rows(const uword row_num, const BaseCube& X) "Cube::insert_rows(): given object has incompatible dimensions" ); - if(N > 0) + if(N == 0) { return; } + + Cube out(t_n_rows + N, n_cols, n_slices, arma_nozeros_indicator()); + + if(A_n_rows > 0) { - Cube out(t_n_rows + N, n_cols, n_slices, arma_nozeros_indicator()); - - if(A_n_rows > 0) - { - out.rows(0, A_n_rows-1) = rows(0, A_n_rows-1); - } - - if(B_n_rows > 0) - { - out.rows(row_num + N, t_n_rows + N - 1) = rows(row_num, t_n_rows - 1); - } - - out.rows(row_num, row_num + N - 1) = C; - - steal_mem(out); + out.rows(0, A_n_rows-1) = rows(0, A_n_rows-1); + } + + if(B_n_rows > 0) + { + out.rows(row_num + N, t_n_rows + N - 1) = rows(row_num, t_n_rows - 1); } + + out.rows(row_num, row_num + N - 1) = C; + + steal_mem(out); } @@ -2463,24 +2546,23 @@ Cube::insert_cols(const uword col_num, const BaseCube& X) "Cube::insert_cols(): given object has incompatible dimensions" ); - if(N > 0) + if(N == 0) { return; } + + Cube out(n_rows, t_n_cols + N, n_slices, arma_nozeros_indicator()); + + if(A_n_cols > 0) { - Cube out(n_rows, t_n_cols + N, n_slices, arma_nozeros_indicator()); - - if(A_n_cols > 0) - { - out.cols(0, A_n_cols-1) = cols(0, A_n_cols-1); - } - - if(B_n_cols > 0) - { - out.cols(col_num + N, t_n_cols + N - 1) = cols(col_num, t_n_cols - 1); - } - - out.cols(col_num, col_num + N - 1) = C; - - steal_mem(out); + out.cols(0, A_n_cols-1) = cols(0, A_n_cols-1); + } + + if(B_n_cols > 0) + { + out.cols(col_num + N, t_n_cols + N - 1) = cols(col_num, t_n_cols - 1); } + + out.cols(col_num, col_num + N - 1) = C; + + steal_mem(out); } @@ -2514,24 +2596,40 @@ Cube::insert_slices(const uword slice_num, const BaseCube& X) "Cube::insert_slices(): given object has incompatible dimensions" ); - if(N > 0) + if(N == 0) { return; } + + Cube out(n_rows, n_cols, t_n_slices + N, arma_nozeros_indicator()); + + if(A_n_slices > 0) { - Cube out(n_rows, n_cols, t_n_slices + N, arma_nozeros_indicator()); - - if(A_n_slices > 0) - { - out.slices(0, A_n_slices-1) = slices(0, A_n_slices-1); - } - - if(B_n_slices > 0) - { - out.slices(slice_num + N, t_n_slices + N - 1) = slices(slice_num, t_n_slices - 1); - } - - out.slices(slice_num, slice_num + N - 1) = C; - - steal_mem(out); + out.slices(0, A_n_slices-1) = slices(0, A_n_slices-1); } + + if(B_n_slices > 0) + { + out.slices(slice_num + N, t_n_slices + N - 1) = slices(slice_num, t_n_slices - 1); + } + + out.slices(slice_num, slice_num + N - 1) = C; + + steal_mem(out); + } + + + +template +template +inline +void +Cube::insert_slices(const uword slice_num, const Base& X) + { + arma_extra_debug_sigprint(); + + const quasi_unwrap U(X.get_ref()); + + const Cube C(const_cast(U.M.memptr()), U.M.n_rows, U.M.n_cols, uword(1), false, true); + + (*this).insert_slices(slice_num, C); } @@ -2549,7 +2647,6 @@ Cube::Cube(const GenCube& X) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -2650,7 +2747,6 @@ Cube::Cube(const OpCube& X) , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -2764,7 +2860,6 @@ Cube::Cube(const eOpCube& X) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -2892,7 +2987,6 @@ Cube::Cube(const mtOpCube& X) , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -2989,7 +3083,6 @@ Cube::Cube(const GlueCube& X) , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -3105,7 +3198,6 @@ Cube::Cube(const eGlueCube& X) , n_alloc() , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -3239,7 +3331,6 @@ Cube::Cube(const mtGlueCube& X) , n_alloc(0) , mem_state(0) , mem() - , mat_ptrs(nullptr) { arma_extra_debug_sigprint_this(this); @@ -5159,10 +5250,24 @@ Cube::steal_mem(Cube& x) { arma_extra_debug_sigprint(); + (*this).steal_mem(x, false); + } + + + +template +inline +void +Cube::steal_mem(Cube& x, const bool is_move) + { + arma_extra_debug_sigprint(); + if(this == &x) { return; } - if( (mem_state <= 1) && ( (x.n_alloc > Cube_prealloc::mem_n_elem) || (x.mem_state == 1) ) ) + if( (mem_state <= 1) && ( (x.n_alloc > Cube_prealloc::mem_n_elem) || (x.mem_state == 1) || (is_move && (x.mem_state == 2)) ) ) { + arma_extra_debug_print("Cube::steal_mem(): stealing memory"); + reset(); const uword x_n_slices = x.n_slices; @@ -5178,16 +5283,20 @@ Cube::steal_mem(Cube& x) if(x_n_slices > Cube_prealloc::mat_ptrs_size) { - access::rw( mat_ptrs) = x.mat_ptrs; - access::rw(x.mat_ptrs) = nullptr; + arma_extra_debug_print("Cube::steal_mem(): stealing mat_ptrs array"); + + mat_ptrs = x.mat_ptrs; + x.mat_ptrs = nullptr; } else { - access::rw(mat_ptrs) = const_cast< const Mat** >(mat_ptrs_local); + arma_extra_debug_print("Cube::steal_mem(): copying mat_ptrs array"); + + mat_ptrs = mat_ptrs_local; for(uword i=0; i < x_n_slices; ++i) { - mat_ptrs[i] = x.mat_ptrs[i]; + mat_ptrs[i] = raw_mat_ptr_type(x.mat_ptrs[i]); // cast required by std::atomic x.mat_ptrs[i] = nullptr; } } @@ -5203,7 +5312,14 @@ Cube::steal_mem(Cube& x) } else { + arma_extra_debug_print("Cube::steal_mem(): copying memory"); + (*this).operator=(x); + + if( (is_move) && (x.mem_state == 0) && (x.n_alloc <= Cube_prealloc::mem_n_elem) ) + { + x.reset(); + } } } @@ -5232,8 +5348,7 @@ Cube::fixed::mem_setup() access::rw(Cube::n_alloc) = 0; access::rw(Cube::mem_state) = 3; access::rw(Cube::mem) = (fixed_n_elem > Cube_prealloc::mem_n_elem) ? mem_local_extra : mem_local; - access::rw(Cube::mat_ptrs) = const_cast< const Mat** >( \ - (fixed_n_slices > Cube_prealloc::mat_ptrs_size) ? mat_ptrs_local_extra : mat_ptrs_local ); + Cube::mat_ptrs = (fixed_n_slices > Cube_prealloc::mat_ptrs_size) ? mat_ptrs_local_extra : mat_ptrs_local; create_mat(); } @@ -5247,7 +5362,7 @@ Cube::fixed::mem_setup() access::rw(Cube::n_alloc) = 0; access::rw(Cube::mem_state) = 3; access::rw(Cube::mem) = nullptr; - access::rw(Cube::mat_ptrs) = nullptr; + Cube::mat_ptrs = nullptr; } } @@ -5262,7 +5377,7 @@ Cube::fixed::fixed() mem_setup(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Cube::fixed::constructor: zeroing memory"); @@ -5270,7 +5385,6 @@ Cube::fixed::fixed() arrayops::fill_zeros(mem_use, fixed_n_elem); } - #endif } @@ -5764,11 +5878,7 @@ Cube_aux::set_real(Cube< std::complex >& out, const BaseCube& X) const uword N = out.n_elem; - for(uword i=0; i( A[i], out_mem[i].imag() ); - } + for(uword i=0; i >& out, const BaseCube& X) for(uword col = 0; col < local_n_cols; ++col ) for(uword row = 0; row < local_n_rows; ++row ) { - (*out_mem) = std::complex( P.at(row,col,slice), (*out_mem).imag() ); + (*out_mem).real(P.at(row,col,slice)); out_mem++; } } @@ -5816,11 +5926,7 @@ Cube_aux::set_imag(Cube< std::complex >& out, const BaseCube& X) const uword N = out.n_elem; - for(uword i=0; i( out_mem[i].real(), A[i] ); - } + for(uword i=0; i >& out, const BaseCube& X) for(uword col = 0; col < local_n_cols; ++col ) for(uword row = 0; row < local_n_rows; ++row ) { - (*out_mem) = std::complex( (*out_mem).real(), P.at(row,col,slice) ); + (*out_mem).imag(P.at(row,col,slice)); out_mem++; } } diff --git a/Include/armadillo/armadillo_bits/GenCube_bones.hpp b/Include/armadillo/armadillo_bits/GenCube_bones.hpp index 965a168f6..3c6099f3d 100644 --- a/Include/armadillo/armadillo_bits/GenCube_bones.hpp +++ b/Include/armadillo/armadillo_bits/GenCube_bones.hpp @@ -20,11 +20,10 @@ //! @{ -//! support class for generator functions (eg. zeros, randu, randn, ...) +//! support class for generator functions (zeros, ones) template class GenCube : public BaseCube< eT, GenCube > - , public GenSpecialiser::yes, is_same_type::yes, is_same_type::yes, is_same_type::yes> { public: @@ -32,7 +31,7 @@ class GenCube typedef typename get_pod_type::result pod_type; static constexpr bool use_at = false; - static constexpr bool is_simple = (is_same_type::value) || (is_same_type::value); + static constexpr bool is_simple = (is_same_type::value) || (is_same_type::value); arma_aligned const uword n_rows; arma_aligned const uword n_cols; @@ -41,9 +40,9 @@ class GenCube arma_inline GenCube(const uword in_n_rows, const uword in_n_cols, const uword in_n_slices); arma_inline ~GenCube(); - arma_inline eT operator[] (const uword i) const; - arma_inline eT at (const uword row, const uword col, const uword slice) const; - arma_inline eT at_alt (const uword i) const; + arma_inline eT operator[] (const uword i) const; + arma_inline eT at (const uword r, const uword c, const uword s) const; + arma_inline eT at_alt (const uword i) const; inline void apply (Cube& out) const; inline void apply_inplace_plus (Cube& out) const; diff --git a/Include/armadillo/armadillo_bits/GenCube_meat.hpp b/Include/armadillo/armadillo_bits/GenCube_meat.hpp index 76257d18d..61735f3b9 100644 --- a/Include/armadillo/armadillo_bits/GenCube_meat.hpp +++ b/Include/armadillo/armadillo_bits/GenCube_meat.hpp @@ -47,7 +47,10 @@ arma_inline eT GenCube::operator[](const uword) const { - return (*this).generate(); + if(is_same_type::yes) { return eT(0); } + else if(is_same_type::yes) { return eT(1); } + + return eT(0); // prevent pedantic compiler warnings } @@ -57,7 +60,10 @@ arma_inline eT GenCube::at(const uword, const uword, const uword) const { - return (*this).generate(); + if(is_same_type::yes) { return eT(0); } + else if(is_same_type::yes) { return eT(1); } + + return eT(0); // prevent pedantic compiler warnings } @@ -67,7 +73,10 @@ arma_inline eT GenCube::at_alt(const uword) const { - return (*this).generate(); + if(is_same_type::yes) { return eT(0); } + else if(is_same_type::yes) { return eT(1); } + + return eT(0); // prevent pedantic compiler warnings } @@ -82,10 +91,8 @@ GenCube::apply(Cube& out) const // NOTE: we're assuming that the cube has already been set to the correct size; // this is done by either the Cube contructor or operator=() - if(is_same_type::yes) { out.ones(); } - else if(is_same_type::yes) { out.zeros(); } - else if(is_same_type::yes) { out.randu(); } - else if(is_same_type::yes) { out.randn(); } + if(is_same_type::yes) { out.zeros(); } + else if(is_same_type::yes) { out.ones(); } } @@ -99,24 +106,9 @@ GenCube::apply_inplace_plus(Cube& out) const arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "addition"); - - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword i,j; - - for(i=0, j=1; j::yes) { - out_mem[i] += (*this).generate(); + arrayops::inplace_plus(out.memptr(), eT(1), out.n_elem); } } @@ -132,24 +124,9 @@ GenCube::apply_inplace_minus(Cube& out) const arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "subtraction"); - - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword i,j; - - for(i=0, j=1; j::yes) { - const eT tmp_i = (*this).generate(); - const eT tmp_j = (*this).generate(); - - out_mem[i] -= tmp_i; - out_mem[j] -= tmp_j; - } - - if(i < n_elem) - { - out_mem[i] -= (*this).generate(); + arrayops::inplace_minus(out.memptr(), eT(1), out.n_elem); } } @@ -165,24 +142,10 @@ GenCube::apply_inplace_schur(Cube& out) const arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "element-wise multiplication"); - - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword i,j; - - for(i=0, j=1; j::yes) { - out_mem[i] *= (*this).generate(); + arrayops::inplace_mul(out.memptr(), eT(0), out.n_elem); + // NOTE: not using arrayops::fill_zeros(), as 'out' may have NaN elements } } @@ -198,24 +161,9 @@ GenCube::apply_inplace_div(Cube& out) const arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "element-wise division"); - - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword i,j; - - for(i=0, j=1; j::yes) { - out_mem[i] /= (*this).generate(); + arrayops::inplace_div(out.memptr(), eT(0), out.n_elem); } } @@ -231,10 +179,8 @@ GenCube::apply(subview_cube& out) const // NOTE: we're assuming that the subcube has the same dimensions as the GenCube object // this is checked by subview_cube::operator=() - if(is_same_type::yes) { out.ones(); } - else if(is_same_type::yes) { out.zeros(); } - else if(is_same_type::yes) { out.randu(); } - else if(is_same_type::yes) { out.randn(); } + if(is_same_type::yes) { out.zeros(); } + else if(is_same_type::yes) { out.ones(); } } diff --git a/Include/armadillo/armadillo_bits/GenSpecialiser.hpp b/Include/armadillo/armadillo_bits/GenSpecialiser.hpp deleted file mode 100644 index 23481a2b6..000000000 --- a/Include/armadillo/armadillo_bits/GenSpecialiser.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// -// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) -// Copyright 2008-2016 National ICT Australia (NICTA) -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ------------------------------------------------------------------------ - - -//! \addtogroup GenSpecialiser -//! @{ - - -template -struct GenSpecialiser - { - arma_inline elem_type generate() const { return elem_type(); } - }; - - -template -struct GenSpecialiser - { - arma_inline elem_type generate() const { return elem_type(0); } - }; - - -template -struct GenSpecialiser - { - arma_inline elem_type generate() const { return elem_type(1); } - }; - - -template -struct GenSpecialiser - { - arma_inline elem_type generate() const { return elem_type(arma_rng::randu()); } - }; - - -template -struct GenSpecialiser - { - arma_inline elem_type generate() const { return elem_type(arma_rng::randn()); } - }; - - -//! @} diff --git a/Include/armadillo/armadillo_bits/Gen_bones.hpp b/Include/armadillo/armadillo_bits/Gen_bones.hpp index 9112acd6a..172e5b9c2 100644 --- a/Include/armadillo/armadillo_bits/Gen_bones.hpp +++ b/Include/armadillo/armadillo_bits/Gen_bones.hpp @@ -20,11 +20,10 @@ //! @{ -//! support class for generator functions (eg. zeros, randu, randn, ...) +//! support class for generator functions (zeros, ones, eye) template class Gen : public Base< typename T1::elem_type, Gen > - , public GenSpecialiser::yes, is_same_type::yes, is_same_type::yes, is_same_type::yes> { public: @@ -32,7 +31,7 @@ class Gen typedef typename get_pod_type::result pod_type; static constexpr bool use_at = (is_same_type::value); - static constexpr bool is_simple = (is_same_type::value) || (is_same_type::value); + static constexpr bool is_simple = (is_same_type::value) || (is_same_type::value); static constexpr bool is_row = T1::is_row; static constexpr bool is_col = T1::is_col; @@ -44,9 +43,9 @@ class Gen arma_inline Gen(const uword in_n_rows, const uword in_n_cols); arma_inline ~Gen(); - arma_inline elem_type operator[] (const uword ii) const; - arma_inline elem_type at (const uword row, const uword col) const; - arma_inline elem_type at_alt (const uword ii) const; + arma_inline elem_type operator[] (const uword ii) const; + arma_inline elem_type at (const uword r, const uword c) const; + arma_inline elem_type at_alt (const uword ii) const; inline void apply (Mat& out) const; inline void apply_inplace_plus (Mat& out) const; diff --git a/Include/armadillo/armadillo_bits/Gen_meat.hpp b/Include/armadillo/armadillo_bits/Gen_meat.hpp index e1a859008..96b8940c3 100644 --- a/Include/armadillo/armadillo_bits/Gen_meat.hpp +++ b/Include/armadillo/armadillo_bits/Gen_meat.hpp @@ -48,14 +48,11 @@ Gen::operator[](const uword ii) const { typedef typename T1::elem_type eT; - if(is_same_type::yes) - { - return ((ii % n_rows) == (ii / n_rows)) ? eT(1) : eT(0); - } - else - { - return (*this).generate(); - } + if(is_same_type::yes) { return eT(0); } + else if(is_same_type::yes) { return eT(1); } + else if(is_same_type::yes) { return ((ii % n_rows) == (ii / n_rows)) ? eT(1) : eT(0); } + + return eT(0); // prevent pedantic compiler warnings } @@ -63,18 +60,15 @@ Gen::operator[](const uword ii) const template arma_inline typename T1::elem_type -Gen::at(const uword row, const uword col) const +Gen::at(const uword r, const uword c) const { typedef typename T1::elem_type eT; - if(is_same_type::yes) - { - return (row == col) ? eT(1) : eT(0); - } - else - { - return (*this).generate(); - } + if(is_same_type::yes) { return eT(0); } + else if(is_same_type::yes) { return eT(1); } + else if(is_same_type::yes) { return (r == c) ? eT(1) : eT(0); } + + return eT(0); // prevent pedantic compiler warnings } @@ -99,11 +93,9 @@ Gen::apply(Mat& out) const // NOTE: we're assuming that the matrix has already been set to the correct size; // this is done by either the Mat contructor or operator=() - if(is_same_type::yes) { out.eye(); } + if(is_same_type::yes) { out.zeros(); } else if(is_same_type::yes) { out.ones(); } - else if(is_same_type::yes) { out.zeros(); } - else if(is_same_type::yes) { out.randu(); } - else if(is_same_type::yes) { out.randn(); } + else if(is_same_type::yes) { out.eye(); } } @@ -119,35 +111,16 @@ Gen::apply_inplace_plus(Mat& out) const typedef typename T1::elem_type eT; - - if(is_same_type::yes) + if(is_same_type::yes) { - const uword N = (std::min)(n_rows, n_cols); - - for(uword iq=0; iq < N; ++iq) - { - out.at(iq,iq) += eT(1); - } + arrayops::inplace_plus(out.memptr(), eT(1), out.n_elem); } else + if(is_same_type::yes) { - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword iq,jq; - for(iq=0, jq=1; jq < n_elem; iq+=2, jq+=2) - { - const eT tmp_i = (*this).generate(); - const eT tmp_j = (*this).generate(); - - out_mem[iq] += tmp_i; - out_mem[jq] += tmp_j; - } + const uword N = (std::min)(n_rows, n_cols); - if(iq < n_elem) - { - out_mem[iq] += (*this).generate(); - } + for(uword ii=0; ii < N; ++ii) { out.at(ii,ii) += eT(1); } } } @@ -165,35 +138,16 @@ Gen::apply_inplace_minus(Mat& out) const typedef typename T1::elem_type eT; - - if(is_same_type::yes) + if(is_same_type::yes) { - const uword N = (std::min)(n_rows, n_cols); - - for(uword iq=0; iq < N; ++iq) - { - out.at(iq,iq) -= eT(1); - } + arrayops::inplace_minus(out.memptr(), eT(1), out.n_elem); } else + if(is_same_type::yes) { - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword iq,jq; - for(iq=0, jq=1; jq < n_elem; iq+=2, jq+=2) - { - const eT tmp_i = (*this).generate(); - const eT tmp_j = (*this).generate(); - - out_mem[iq] -= tmp_i; - out_mem[jq] -= tmp_j; - } + const uword N = (std::min)(n_rows, n_cols); - if(iq < n_elem) - { - out_mem[iq] -= (*this).generate(); - } + for(uword ii=0; ii < N; ++ii) { out.at(ii,ii) -= eT(1); } } } @@ -211,35 +165,18 @@ Gen::apply_inplace_schur(Mat& out) const typedef typename T1::elem_type eT; - - if(is_same_type::yes) + if(is_same_type::yes) { - const uword N = (std::min)(n_rows, n_cols); - - for(uword iq=0; iq < N; ++iq) - { - for(uword row=0; row < iq; ++row) { out.at(row,iq) = eT(0); } - for(uword row=iq+1; row < n_rows; ++row) { out.at(row,iq) = eT(0); } - } + arrayops::inplace_mul(out.memptr(), eT(0), out.n_elem); + // NOTE: not using arrayops::fill_zeros(), as 'out' may have NaN elements } else + if(is_same_type::yes) { - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword iq,jq; - for(iq=0, jq=1; jq < n_elem; iq+=2, jq+=2) - { - const eT tmp_i = (*this).generate(); - const eT tmp_j = (*this).generate(); - - out_mem[iq] *= tmp_i; - out_mem[jq] *= tmp_j; - } - - if(iq < n_elem) + for(uword c=0; c < n_cols; ++c) + for(uword r=0; r < n_rows; ++r) { - out_mem[iq] *= (*this).generate(); + if(r != c) { out.at(r,c) *= eT(0); } } } } @@ -258,37 +195,17 @@ Gen::apply_inplace_div(Mat& out) const typedef typename T1::elem_type eT; - - if(is_same_type::yes) + if(is_same_type::yes) { - const uword N = (std::min)(n_rows, n_cols); - - for(uword iq=0; iq < N; ++iq) - { - const eT zero = eT(0); - - for(uword row=0; row < iq; ++row) { out.at(row,iq) /= zero; } - for(uword row=iq+1; row < n_rows; ++row) { out.at(row,iq) /= zero; } - } + arrayops::inplace_div(out.memptr(), eT(0), out.n_elem); } else + if(is_same_type::yes) { - eT* out_mem = out.memptr(); - const uword n_elem = out.n_elem; - - uword iq,jq; - for(iq=0, jq=1; jq < n_elem; iq+=2, jq+=2) - { - const eT tmp_i = (*this).generate(); - const eT tmp_j = (*this).generate(); - - out_mem[iq] /= tmp_i; - out_mem[jq] /= tmp_j; - } - - if(iq < n_elem) + for(uword c=0; c < n_cols; ++c) + for(uword r=0; r < n_rows; ++r) { - out_mem[iq] /= (*this).generate(); + if(r != c) { out.at(r,c) /= eT(0); } } } } @@ -305,11 +222,9 @@ Gen::apply(subview& out) const // NOTE: we're assuming that the submatrix has the same dimensions as the Gen object // this is checked by subview::operator=() - if(is_same_type::yes) { out.eye(); } + if(is_same_type::yes) { out.zeros(); } else if(is_same_type::yes) { out.ones(); } - else if(is_same_type::yes) { out.zeros(); } - else if(is_same_type::yes) { out.randu(); } - else if(is_same_type::yes) { out.randn(); } + else if(is_same_type::yes) { out.eye(); } } diff --git a/Include/armadillo/armadillo_bits/MapMat_meat.hpp b/Include/armadillo/armadillo_bits/MapMat_meat.hpp index 6dea74ad2..748bf5dd3 100644 --- a/Include/armadillo/armadillo_bits/MapMat_meat.hpp +++ b/Include/armadillo/armadillo_bits/MapMat_meat.hpp @@ -198,6 +198,8 @@ MapMat::operator=(MapMat&& x) { arma_extra_debug_sigprint(); + if(this == &x) { return; } + reset(); if(map_ptr) { delete map_ptr; } @@ -1184,11 +1186,9 @@ SpMat_MapMat_val::operator=(const eT in_val) } #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - s_parent.cache_mutex.lock(); + const std::lock_guard lock(s_parent.cache_mutex); (*this).set(in_val); - - s_parent.cache_mutex.unlock(); } #else { @@ -1219,11 +1219,9 @@ SpMat_MapMat_val::operator+=(const eT in_val) } #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - s_parent.cache_mutex.lock(); + const std::lock_guard lock(s_parent.cache_mutex); (*this).add(in_val); - - s_parent.cache_mutex.unlock(); } #else { @@ -1254,11 +1252,9 @@ SpMat_MapMat_val::operator-=(const eT in_val) } #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - s_parent.cache_mutex.lock(); + const std::lock_guard lock(s_parent.cache_mutex); (*this).sub(in_val); - - s_parent.cache_mutex.unlock(); } #else { @@ -1287,11 +1283,9 @@ SpMat_MapMat_val::operator*=(const eT in_val) } #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - s_parent.cache_mutex.lock(); + const std::lock_guard lock(s_parent.cache_mutex); (*this).mul(in_val); - - s_parent.cache_mutex.unlock(); } #else { @@ -1320,11 +1314,9 @@ SpMat_MapMat_val::operator/=(const eT in_val) } #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - s_parent.cache_mutex.lock(); + const std::lock_guard lock(s_parent.cache_mutex); (*this).div(in_val); - - s_parent.cache_mutex.unlock(); } #else { diff --git a/Include/armadillo/armadillo_bits/Mat_bones.hpp b/Include/armadillo/armadillo_bits/Mat_bones.hpp index 824000b79..f7cc6f32b 100644 --- a/Include/armadillo/armadillo_bits/Mat_bones.hpp +++ b/Include/armadillo/armadillo_bits/Mat_bones.hpp @@ -188,8 +188,8 @@ class Mat : public Base< eT, Mat > inline Mat& operator/=(const spdiagview& X); - arma_cold inline mat_injector operator<<(const eT val); - arma_cold inline mat_injector operator<<(const injector_end_of_row<>& x); + arma_deprecated inline mat_injector operator<<(const eT val); + arma_deprecated inline mat_injector operator<<(const injector_end_of_row<>& x); arma_inline subview_row row(const uword row_num); @@ -308,8 +308,11 @@ class Mat : public Base< eT, Mat > template inline void shed_rows(const Base& indices); template inline void shed_cols(const Base& indices); - inline void insert_rows(const uword row_num, const uword N, const bool set_to_zero = true); - inline void insert_cols(const uword col_num, const uword N, const bool set_to_zero = true); + arma_deprecated inline void insert_rows(const uword row_num, const uword N, const bool set_to_zero); + arma_deprecated inline void insert_cols(const uword col_num, const uword N, const bool set_to_zero); + + inline void insert_rows(const uword row_num, const uword N); + inline void insert_cols(const uword col_num, const uword N); template inline void insert_rows(const uword row_num, const Base& X); template inline void insert_cols(const uword col_num, const Base& X); @@ -745,7 +748,8 @@ class Mat : public Base< eT, Mat > inline void swap(Mat& B); - inline void steal_mem(Mat& X); //!< don't use this unless you're writing code internal to Armadillo + inline void steal_mem(Mat& X); //!< don't use this unless you're writing code internal to Armadillo + inline void steal_mem(Mat& X, const bool is_move); //!< don't use this unless you're writing code internal to Armadillo inline void steal_mem_col(Mat& X, const uword max_n_rows); diff --git a/Include/armadillo/armadillo_bits/Mat_meat.hpp b/Include/armadillo/armadillo_bits/Mat_meat.hpp index b1873fbe5..f3f1cba29 100644 --- a/Include/armadillo/armadillo_bits/Mat_meat.hpp +++ b/Include/armadillo/armadillo_bits/Mat_meat.hpp @@ -72,12 +72,11 @@ Mat::Mat(const uword in_n_rows, const uword in_n_cols) init_cold(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Mat::constructor: zeroing memory"); arrayops::fill_zeros(memptr(), n_elem); } - #endif } @@ -97,12 +96,11 @@ Mat::Mat(const SizeMat& s) init_cold(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Mat::constructor: zeroing memory"); arrayops::fill_zeros(memptr(), n_elem); } - #endif } @@ -353,7 +351,11 @@ Mat::init_warm(uword in_n_rows, uword in_n_cols) const uhword t_vec_state = vec_state; const uhword t_mem_state = mem_state; - arma_debug_set_error( err_state, err_msg, (t_mem_state == 3), "Mat::init(): size is fixed and hence cannot be changed" ); + const char* error_message_1 = "Mat::init(): size is fixed and hence cannot be changed"; + const char* error_message_2 = "Mat::init(): requested size is not compatible with column vector layout"; + const char* error_message_3 = "Mat::init(): requested size is not compatible with row vector layout"; + + arma_debug_set_error( err_state, err_msg, (t_mem_state == 3), error_message_1 ); if(t_vec_state > 0) { @@ -364,17 +366,17 @@ Mat::init_warm(uword in_n_rows, uword in_n_cols) } else { - if(t_vec_state == 1) { arma_debug_set_error( err_state, err_msg, (in_n_cols != 1), "Mat::init(): requested size is not compatible with column vector layout" ); } - if(t_vec_state == 2) { arma_debug_set_error( err_state, err_msg, (in_n_rows != 1), "Mat::init(): requested size is not compatible with row vector layout" ); } + if(t_vec_state == 1) { arma_debug_set_error( err_state, err_msg, (in_n_cols != 1), error_message_2 ); } + if(t_vec_state == 2) { arma_debug_set_error( err_state, err_msg, (in_n_rows != 1), error_message_3 ); } } } // ensure that n_elem can hold the result of (n_rows * n_cols) #if defined(ARMA_64BIT_WORD) - const char* error_message = "Mat::init(): requested size is too large"; + const char* error_message_4 = "Mat::init(): requested size is too large"; #else - const char* error_message = "Mat::init(): requested size is too large; suggest to enable ARMA_64BIT_WORD"; + const char* error_message_4 = "Mat::init(): requested size is too large; suggest to enable ARMA_64BIT_WORD"; #endif arma_debug_set_error @@ -386,7 +388,7 @@ Mat::init_warm(uword in_n_rows, uword in_n_cols) ? ( (double(in_n_rows) * double(in_n_cols)) > double(ARMA_MAX_UWORD) ) : false ), - error_message + error_message_4 ); arma_debug_check(err_state, err_msg); @@ -794,15 +796,7 @@ Mat::operator=(Mat&& X) { arma_extra_debug_sigprint(arma_str::format("this = %x X = %x") % this % &X); - (*this).steal_mem(X); - - if( (X.mem_state == 0) && (X.n_alloc <= arma_config::mat_prealloc) && (this != &X) ) - { - access::rw(X.n_rows) = 0; - access::rw(X.n_cols) = 0; - access::rw(X.n_elem) = 0; - access::rw(X.mem) = nullptr; - } + (*this).steal_mem(X, true); return *this; } @@ -939,7 +933,7 @@ Mat::init(const std::initializer_list& list) set_size(1, N); - arrayops::copy( memptr(), list.begin(), N ); + if(N > 0) { arrayops::copy( memptr(), list.begin(), N ); } } @@ -1199,6 +1193,18 @@ Mat::steal_mem(Mat& x) { arma_extra_debug_sigprint(); + (*this).steal_mem(x, false); + } + + + +template +inline +void +Mat::steal_mem(Mat& x, const bool is_move) + { + arma_extra_debug_sigprint(); + if(this == &x) { return; } const uword x_n_rows = x.n_rows; @@ -1213,8 +1219,10 @@ Mat::steal_mem(Mat& x) const bool layout_ok = (t_vec_state == x_vec_state) || ((t_vec_state == 1) && (x_n_cols == 1)) || ((t_vec_state == 2) && (x_n_rows == 1)); - if( layout_ok && (t_mem_state <= 1) && ((x_n_alloc > arma_config::mat_prealloc) || (x_mem_state == 1)) ) + if( layout_ok && (t_mem_state <= 1) && ( (x_n_alloc > arma_config::mat_prealloc) || (x_mem_state == 1) || (is_move && (x_mem_state == 2)) ) ) { + arma_extra_debug_print("Mat::steal_mem(): stealing memory"); + reset(); access::rw(n_rows) = x_n_rows; @@ -1224,8 +1232,8 @@ Mat::steal_mem(Mat& x) access::rw(mem_state) = x_mem_state; access::rw(mem) = x.mem; - access::rw(x.n_rows) = 0; - access::rw(x.n_cols) = 0; + access::rw(x.n_rows) = (x_vec_state == 2) ? 1 : 0; + access::rw(x.n_cols) = (x_vec_state == 1) ? 1 : 0; access::rw(x.n_elem) = 0; access::rw(x.n_alloc) = 0; access::rw(x.mem_state) = 0; @@ -1233,7 +1241,17 @@ Mat::steal_mem(Mat& x) } else { + arma_extra_debug_print("Mat::steal_mem(): copying memory"); + (*this).operator=(x); + + if( (is_move) && (x_mem_state == 0) && (x_n_alloc <= arma_config::mat_prealloc) ) + { + access::rw(x.n_rows) = (x_vec_state == 2) ? 1 : 0; + access::rw(x.n_cols) = (x_vec_state == 1) ? 1 : 0; + access::rw(x.n_elem) = 0; + access::rw(x.mem) = nullptr; + } } } @@ -2744,7 +2762,7 @@ Mat::operator%=(const SpBase& m) typename SpProxy::const_iterator_type it_end = p.end(); // We have to zero everything that isn't being used. - arrayops::inplace_set(memptr(), eT(0), (it.col() * n_rows) + it.row()); + arrayops::fill_zeros(memptr(), (it.col() * n_rows) + it.row()); while(it != it_end) { @@ -2758,7 +2776,7 @@ Mat::operator%=(const SpBase& m) ? (p.get_n_cols() * n_rows) : (it.col() * n_rows) + it.row(); - arrayops::inplace_set(memptr() + cur_loc + 1, eT(0), (next_loc - cur_loc - 1)); + arrayops::fill_zeros(memptr() + cur_loc + 1, (next_loc - cur_loc - 1)); } return *this; @@ -2818,8 +2836,12 @@ Mat::operator=(const SpSubview& X) (*this).zeros(X.n_rows, X.n_cols); + if(X.n_nonzero == 0) { return *this; } + if(X.n_rows == X.m.n_rows) { + X.m.sync(); + const uword sv_col_start = X.aux_col1; const uword sv_col_end = X.aux_col1 + X.n_cols - 1; @@ -2973,7 +2995,7 @@ Mat::operator/=(const spdiagview& X) template -arma_cold +arma_deprecated inline mat_injector< Mat > Mat::operator<<(const eT val) @@ -2984,7 +3006,7 @@ Mat::operator<<(const eT val) template -arma_cold +arma_deprecated inline mat_injector< Mat > Mat::operator<<(const injector_end_of_row<>& x) @@ -4521,15 +4543,28 @@ Mat::shed_cols(const Base& indices) -//! insert N rows at the specified row position, -//! optionally setting the elements of the inserted rows to zero template +arma_deprecated inline void Mat::insert_rows(const uword row_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_rows(row_num, N); + } + + + +template +inline +void +Mat::insert_rows(const uword row_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_rows = n_rows; const uword t_n_cols = n_cols; @@ -4539,40 +4574,49 @@ Mat::insert_rows(const uword row_num, const uword N, const bool set_to_zero) // insertion at row_num == n_rows is in effect an append operation arma_debug_check_bounds( (row_num > t_n_rows), "Mat::insert_rows(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Mat out(t_n_rows + N, t_n_cols, arma_nozeros_indicator()); + + if(A_n_rows > 0) { - Mat out(t_n_rows + N, t_n_cols, arma_nozeros_indicator()); - - if(A_n_rows > 0) - { - out.rows(0, A_n_rows-1) = rows(0, A_n_rows-1); - } - - if(B_n_rows > 0) - { - out.rows(row_num + N, t_n_rows + N - 1) = rows(row_num, t_n_rows-1); - } - - if(set_to_zero) - { - out.rows(row_num, row_num + N - 1).zeros(); - } - - steal_mem(out); + out.rows(0, A_n_rows-1) = rows(0, A_n_rows-1); } + + if(B_n_rows > 0) + { + out.rows(row_num + N, t_n_rows + N - 1) = rows(row_num, t_n_rows-1); + } + + out.rows(row_num, row_num + N - 1).zeros(); + + steal_mem(out); } -//! insert N columns at the specified column position, -//! optionally setting the elements of the inserted columns to zero template +arma_deprecated inline void Mat::insert_cols(const uword col_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_cols(col_num, N); + } + + + +template +inline +void +Mat::insert_cols(const uword col_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_rows = n_rows; const uword t_n_cols = n_cols; @@ -4582,27 +4626,23 @@ Mat::insert_cols(const uword col_num, const uword N, const bool set_to_zero) // insertion at col_num == n_cols is in effect an append operation arma_debug_check_bounds( (col_num > t_n_cols), "Mat::insert_cols(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Mat out(t_n_rows, t_n_cols + N, arma_nozeros_indicator()); + + if(A_n_cols > 0) { - Mat out(t_n_rows, t_n_cols + N, arma_nozeros_indicator()); - - if(A_n_cols > 0) - { - out.cols(0, A_n_cols-1) = cols(0, A_n_cols-1); - } - - if(B_n_cols > 0) - { - out.cols(col_num + N, t_n_cols + N - 1) = cols(col_num, t_n_cols-1); - } - - if(set_to_zero) - { - out.cols(col_num, col_num + N - 1).zeros(); - } - - steal_mem(out); + out.cols(0, A_n_cols-1) = cols(0, A_n_cols-1); + } + + if(B_n_cols > 0) + { + out.cols(col_num + N, t_n_cols + N - 1) = cols(col_num, t_n_cols-1); } + + out.cols(col_num, col_num + N - 1).zeros(); + + steal_mem(out); } @@ -4632,6 +4672,9 @@ Mat::insert_rows(const uword row_num, const Base& X) bool err_state = false; char* err_msg = nullptr; + const char* error_message_1 = "Mat::insert_rows(): index out of bounds"; + const char* error_message_2 = "Mat::insert_rows(): given object has an incompatible number of columns"; + // insertion at row_num == n_rows is in effect an append operation arma_debug_set_error @@ -4639,7 +4682,7 @@ Mat::insert_rows(const uword row_num, const Base& X) err_state, err_msg, (row_num > t_n_rows), - "Mat::insert_rows(): index out of bounds" + error_message_1 ); arma_debug_set_error @@ -4647,7 +4690,7 @@ Mat::insert_rows(const uword row_num, const Base& X) err_state, err_msg, ( (C_n_cols != t_n_cols) && ( (t_n_rows > 0) || (t_n_cols > 0) ) && ( (C_n_rows > 0) || (C_n_cols > 0) ) ), - "Mat::insert_rows(): given object has an incompatible number of columns" + error_message_2 ); arma_debug_check_bounds(err_state, err_msg); @@ -4705,6 +4748,9 @@ Mat::insert_cols(const uword col_num, const Base& X) bool err_state = false; char* err_msg = nullptr; + const char* error_message_1 = "Mat::insert_cols(): index out of bounds"; + const char* error_message_2 = "Mat::insert_cols(): given object has an incompatible number of rows"; + // insertion at col_num == n_cols is in effect an append operation arma_debug_set_error @@ -4712,7 +4758,7 @@ Mat::insert_cols(const uword col_num, const Base& X) err_state, err_msg, (col_num > t_n_cols), - "Mat::insert_cols(): index out of bounds" + error_message_1 ); arma_debug_set_error @@ -4720,7 +4766,7 @@ Mat::insert_cols(const uword col_num, const Base& X) err_state, err_msg, ( (C_n_rows != t_n_rows) && ( (t_n_rows > 0) || (t_n_cols > 0) ) && ( (C_n_rows > 0) || (C_n_cols > 0) ) ), - "Mat::insert_cols(): given object has an incompatible number of rows" + error_message_2 ); arma_debug_check_bounds(err_state, err_msg); @@ -9183,7 +9229,7 @@ Mat::fixed::fixed() { arma_extra_debug_sigprint_this(this); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Mat::fixed::constructor: zeroing memory"); @@ -9191,7 +9237,6 @@ Mat::fixed::fixed() arrayops::inplace_set_fixed( mem_use, eT(0) ); } - #endif } @@ -10011,17 +10056,14 @@ Mat_aux::set_real(Mat< std::complex >& out, const Base& X) const uword N = out.n_elem; - for(uword i=0; i( A[i], out_mem[i].imag() ); - } + for(uword i=0; i( P.at(row,col), (*out_mem).imag() ); + (*out_mem).real(P.at(row,col)); out_mem++; } } @@ -10055,17 +10097,14 @@ Mat_aux::set_imag(Mat< std::complex >& out, const Base& X) const uword N = out.n_elem; - for(uword i=0; i( out_mem[i].real(), A[i] ); - } + for(uword i=0; i( (*out_mem).real(), P.at(row,col) ); + (*out_mem).imag(P.at(row,col)); out_mem++; } } diff --git a/Include/armadillo/armadillo_bits/Proxy.hpp b/Include/armadillo/armadillo_bits/Proxy.hpp index 335489c41..cc1b75127 100644 --- a/Include/armadillo/armadillo_bits/Proxy.hpp +++ b/Include/armadillo/armadillo_bits/Proxy.hpp @@ -334,101 +334,7 @@ struct Proxy< Gen > template constexpr bool has_overlap(const subview&) const { return false; } - arma_inline bool is_aligned() const { return Gen::is_simple; } - }; - - - -template -struct Proxy< Gen > - { - typedef typename T1::elem_type elem_type; - typedef typename get_pod_type::result pod_type; - typedef Mat stored_type; - typedef const elem_type* ea_type; - typedef const Mat& aligned_ea_type; - - static constexpr bool use_at = false; - static constexpr bool use_mp = false; - static constexpr bool has_subview = false; - - static constexpr bool is_row = Gen::is_row; - static constexpr bool is_col = Gen::is_col; - static constexpr bool is_xvec = Gen::is_xvec; - - arma_aligned const Mat Q; - - inline explicit Proxy(const Gen& A) - : Q(A) - { - arma_extra_debug_sigprint(); - } - - arma_inline uword get_n_rows() const { return (is_row ? 1 : Q.n_rows); } - arma_inline uword get_n_cols() const { return (is_col ? 1 : Q.n_cols); } - arma_inline uword get_n_elem() const { return (is_row ? 1 : Q.n_rows) * (is_col ? 1 : Q.n_cols); } - - arma_inline elem_type operator[] (const uword i) const { return Q[i]; } - arma_inline elem_type at (const uword r, const uword c) const { return Q.at(r, c); } - arma_inline elem_type at_alt (const uword i) const { return Q.at_alt(i); } - - arma_inline ea_type get_ea() const { return Q.memptr(); } - arma_inline aligned_ea_type get_aligned_ea() const { return Q; } - - template - constexpr bool is_alias(const Mat&) const { return false; } - - template - constexpr bool has_overlap(const subview&) const { return false; } - - arma_inline bool is_aligned() const { return memory::is_aligned(Q.memptr()); } - }; - - - -template -struct Proxy< Gen > - { - typedef typename T1::elem_type elem_type; - typedef typename get_pod_type::result pod_type; - typedef Mat stored_type; - typedef const elem_type* ea_type; - typedef const Mat& aligned_ea_type; - - static constexpr bool use_at = false; - static constexpr bool use_mp = false; - static constexpr bool has_subview = false; - - static constexpr bool is_row = Gen::is_row; - static constexpr bool is_col = Gen::is_col; - static constexpr bool is_xvec = Gen::is_xvec; - - arma_aligned const Mat Q; - - inline explicit Proxy(const Gen& A) - : Q(A) - { - arma_extra_debug_sigprint(); - } - - arma_inline uword get_n_rows() const { return (is_row ? 1 : Q.n_rows); } - arma_inline uword get_n_cols() const { return (is_col ? 1 : Q.n_cols); } - arma_inline uword get_n_elem() const { return (is_row ? 1 : Q.n_rows) * (is_col ? 1 : Q.n_cols); } - - arma_inline elem_type operator[] (const uword i) const { return Q[i]; } - arma_inline elem_type at (const uword r, const uword c) const { return Q.at(r, c); } - arma_inline elem_type at_alt (const uword i) const { return Q.at_alt(i); } - - arma_inline ea_type get_ea() const { return Q.memptr(); } - arma_inline aligned_ea_type get_aligned_ea() const { return Q; } - - template - constexpr bool is_alias(const Mat&) const { return false; } - - template - constexpr bool has_overlap(const subview&) const { return false; } - - arma_inline bool is_aligned() const { return memory::is_aligned(Q.memptr()); } + constexpr bool is_aligned() const { return Gen::is_simple; } }; diff --git a/Include/armadillo/armadillo_bits/ProxyCube.hpp b/Include/armadillo/armadillo_bits/ProxyCube.hpp index d56ece8ce..ef6392843 100644 --- a/Include/armadillo/armadillo_bits/ProxyCube.hpp +++ b/Include/armadillo/armadillo_bits/ProxyCube.hpp @@ -120,97 +120,7 @@ struct ProxyCube< GenCube > template constexpr bool has_overlap(const subview_cube&) const { return false; } - arma_inline bool is_aligned() const { return GenCube::is_simple; } - }; - - - -template -struct ProxyCube< GenCube > - { - typedef eT elem_type; - typedef typename get_pod_type::result pod_type; - typedef Cube stored_type; - typedef const eT* ea_type; - typedef const Cube& aligned_ea_type; - - static constexpr bool use_at = false; - static constexpr bool use_mp = false; - static constexpr bool has_subview = false; - - arma_aligned const Cube Q; - - inline explicit ProxyCube(const GenCube& A) - : Q(A) - { - arma_extra_debug_sigprint(); - } - - arma_inline uword get_n_rows() const { return Q.n_rows; } - arma_inline uword get_n_cols() const { return Q.n_cols; } - arma_inline uword get_n_elem_slice() const { return Q.n_elem_slice; } - arma_inline uword get_n_slices() const { return Q.n_slices; } - arma_inline uword get_n_elem() const { return Q.n_elem; } - - arma_inline elem_type operator[] (const uword i) const { return Q[i]; } - arma_inline elem_type at (const uword r, const uword c, const uword s) const { return Q.at(r, c, s); } - arma_inline elem_type at_alt (const uword i) const { return Q.at_alt(i); } - - arma_inline ea_type get_ea() const { return Q.memptr(); } - arma_inline aligned_ea_type get_aligned_ea() const { return Q; } - - template - constexpr bool is_alias(const Cube&) const { return false; } - - template - constexpr bool has_overlap(const subview_cube&) const { return false; } - - arma_inline bool is_aligned() const { return memory::is_aligned(Q.memptr()); } - }; - - - -template -struct ProxyCube< GenCube > - { - typedef eT elem_type; - typedef typename get_pod_type::result pod_type; - typedef Cube stored_type; - typedef const eT* ea_type; - typedef const Cube& aligned_ea_type; - - static constexpr bool use_at = false; - static constexpr bool use_mp = false; - static constexpr bool has_subview = false; - - arma_aligned const Cube Q; - - inline explicit ProxyCube(const GenCube& A) - : Q(A) - { - arma_extra_debug_sigprint(); - } - - arma_inline uword get_n_rows() const { return Q.n_rows; } - arma_inline uword get_n_cols() const { return Q.n_cols; } - arma_inline uword get_n_elem_slice() const { return Q.n_elem_slice; } - arma_inline uword get_n_slices() const { return Q.n_slices; } - arma_inline uword get_n_elem() const { return Q.n_elem; } - - arma_inline elem_type operator[] (const uword i) const { return Q[i]; } - arma_inline elem_type at (const uword r, const uword c, const uword s) const { return Q.at(r, c, s); } - arma_inline elem_type at_alt (const uword i) const { return Q.at_alt(i); } - - arma_inline ea_type get_ea() const { return Q.memptr(); } - arma_inline aligned_ea_type get_aligned_ea() const { return Q; } - - template - constexpr bool is_alias(const Cube&) const { return false; } - - template - constexpr bool has_overlap(const subview_cube&) const { return false; } - - arma_inline bool is_aligned() const { return memory::is_aligned(Q.memptr()); } + constexpr bool is_aligned() const { return GenCube::is_simple; } }; diff --git a/Include/armadillo/armadillo_bits/Row_bones.hpp b/Include/armadillo/armadillo_bits/Row_bones.hpp index 8bccc98ca..1ce03347a 100644 --- a/Include/armadillo/armadillo_bits/Row_bones.hpp +++ b/Include/armadillo/armadillo_bits/Row_bones.hpp @@ -88,7 +88,7 @@ class Row : public Mat inline Row(const subview_cube& X); inline Row& operator=(const subview_cube& X); - arma_cold inline mat_injector operator<<(const eT val); + arma_deprecated inline mat_injector operator<<(const eT val); arma_inline arma_warn_unused const Op,op_htrans> t() const; arma_inline arma_warn_unused const Op,op_htrans> ht() const; @@ -138,7 +138,9 @@ class Row : public Mat template inline void shed_cols(const Base& indices); - inline void insert_cols(const uword col_num, const uword N, const bool set_to_zero = true); + arma_deprecated inline void insert_cols(const uword col_num, const uword N, const bool set_to_zero); + inline void insert_cols(const uword col_num, const uword N); + template inline void insert_cols(const uword col_num, const Base& X); diff --git a/Include/armadillo/armadillo_bits/Row_meat.hpp b/Include/armadillo/armadillo_bits/Row_meat.hpp index 7acf7d52c..f671a6e4f 100644 --- a/Include/armadillo/armadillo_bits/Row_meat.hpp +++ b/Include/armadillo/armadillo_bits/Row_meat.hpp @@ -51,12 +51,11 @@ Row::Row(const uword in_n_elem) { arma_extra_debug_sigprint(); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Row::constructor: zeroing memory"); arrayops::fill_zeros(Mat::memptr(), Mat::n_elem); } - #endif } @@ -70,12 +69,11 @@ Row::Row(const uword in_n_rows, const uword in_n_cols) Mat::init_warm(in_n_rows, in_n_cols); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Row::constructor: zeroing memory"); arrayops::fill_zeros(Mat::memptr(), Mat::n_elem); } - #endif } @@ -89,12 +87,11 @@ Row::Row(const SizeMat& s) Mat::init_warm(s.n_rows, s.n_cols); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Row::constructor: zeroing memory"); arrayops::fill_zeros(Mat::memptr(), Mat::n_elem); } - #endif } @@ -314,10 +311,9 @@ Row::Row(const std::vector& x) { arma_extra_debug_sigprint_this(this); - if(x.size() > 0) - { - arrayops::copy( Mat::memptr(), &(x[0]), uword(x.size()) ); - } + const uword N = uword(x.size()); + + if(N > 0) { arrayops::copy( Mat::memptr(), &(x[0]), N ); } } @@ -330,12 +326,11 @@ Row::operator=(const std::vector& x) { arma_extra_debug_sigprint(); - Mat::init_warm(1, uword(x.size())); + const uword N = uword(x.size()); - if(x.size() > 0) - { - arrayops::copy( Mat::memptr(), &(x[0]), uword(x.size()) ); - } + Mat::init_warm(1, N); + + if(N > 0) { arrayops::copy( Mat::memptr(), &(x[0]), N ); } return *this; } @@ -345,11 +340,13 @@ Row::operator=(const std::vector& x) template inline Row::Row(const std::initializer_list& list) - : Mat(arma_vec_indicator(), 2) + : Mat(arma_vec_indicator(), 1, uword(list.size()), 2) { - arma_extra_debug_sigprint(); + arma_extra_debug_sigprint_this(this); - (*this).operator=(list); + const uword N = uword(list.size()); + + if(N > 0) { arrayops::copy( Mat::memptr(), list.begin(), N ); } } @@ -361,14 +358,11 @@ Row::operator=(const std::initializer_list& list) { arma_extra_debug_sigprint(); - Mat tmp(list); + const uword N = uword(list.size()); - arma_debug_check( ((tmp.n_elem > 0) && (tmp.is_vec() == false)), "Mat::init(): requested size is not compatible with row vector layout" ); + Mat::init_warm(1, N); - access::rw(tmp.n_rows) = 1; - access::rw(tmp.n_cols) = tmp.n_elem; - - (*this).steal_mem(tmp); + if(N > 0) { arrayops::copy( Mat::memptr(), list.begin(), N ); } return *this; } @@ -424,15 +418,7 @@ Row::operator=(Row&& X) { arma_extra_debug_sigprint(arma_str::format("this = %x X = %x") % this % &X); - (*this).steal_mem(X); - - if( (X.mem_state == 0) && (X.n_alloc <= arma_config::mat_prealloc) && (this != &X) ) - { - access::rw(X.n_rows) = 1; - access::rw(X.n_cols) = 0; - access::rw(X.n_elem) = 0; - access::rw(X.mem) = nullptr; - } + (*this).steal_mem(X, true); return *this; } @@ -625,7 +611,7 @@ Row::operator=(const subview_cube& X) template inline -arma_cold +arma_deprecated mat_injector< Row > Row::operator<<(const eT val) { @@ -1068,15 +1054,28 @@ Row::shed_cols(const Base& indices) -//! insert N cols at the specified col position, -//! optionally setting the elements of the inserted cols to zero template +arma_deprecated inline void Row::insert_cols(const uword col_num, const uword N, const bool set_to_zero) { arma_extra_debug_sigprint(); + arma_ignore(set_to_zero); + + (*this).insert_cols(col_num, N); + } + + + +template +inline +void +Row::insert_cols(const uword col_num, const uword N) + { + arma_extra_debug_sigprint(); + const uword t_n_cols = Mat::n_cols; const uword A_n_cols = col_num; @@ -1085,30 +1084,26 @@ Row::insert_cols(const uword col_num, const uword N, const bool set_to_zero) // insertion at col_num == n_cols is in effect an append operation arma_debug_check_bounds( (col_num > t_n_cols), "Row::insert_cols(): index out of bounds" ); - if(N > 0) + if(N == 0) { return; } + + Row out(t_n_cols + N, arma_nozeros_indicator()); + + eT* out_mem = out.memptr(); + const eT* t_mem = (*this).memptr(); + + if(A_n_cols > 0) { - Row out(t_n_cols + N, arma_nozeros_indicator()); - - eT* out_mem = out.memptr(); - const eT* t_mem = (*this).memptr(); - - if(A_n_cols > 0) - { - arrayops::copy( out_mem, t_mem, A_n_cols ); - } - - if(B_n_cols > 0) - { - arrayops::copy( &(out_mem[col_num + N]), &(t_mem[col_num]), B_n_cols ); - } - - if(set_to_zero) - { - arrayops::inplace_set( &(out_mem[col_num]), eT(0), N ); - } - - Mat::steal_mem(out); + arrayops::copy( out_mem, t_mem, A_n_cols ); } + + if(B_n_cols > 0) + { + arrayops::copy( &(out_mem[col_num + N]), &(t_mem[col_num]), B_n_cols ); + } + + arrayops::fill_zeros( &(out_mem[col_num]), N ); + + Mat::steal_mem(out); } @@ -1236,7 +1231,7 @@ Row::fixed::fixed() { arma_extra_debug_sigprint_this(this); - #if (!defined(ARMA_DONT_ZERO_INIT)) + if(arma_config::zero_init) { arma_extra_debug_print("Row::fixed::constructor: zeroing memory"); @@ -1244,7 +1239,6 @@ Row::fixed::fixed() arrayops::inplace_set_fixed( mem_use, eT(0) ); } - #endif } diff --git a/Include/armadillo/armadillo_bits/SpMat_meat.hpp b/Include/armadillo/armadillo_bits/SpMat_meat.hpp index 644f1e5b9..2bb4d98ed 100644 --- a/Include/armadillo/armadillo_bits/SpMat_meat.hpp +++ b/Include/armadillo/armadillo_bits/SpMat_meat.hpp @@ -1355,8 +1355,8 @@ SpMat::operator=(const SpSubview& X) const uword sv_col_start = X.aux_col1; const uword sv_col_end = X.aux_col1 + X.n_cols - 1; - typename SpMat::const_col_iterator m_it = X.m.begin_col(sv_col_start); - typename SpMat::const_col_iterator m_it_end = X.m.end_col(sv_col_end); + typename SpMat::const_col_iterator m_it = X.m.begin_col_no_sync(sv_col_start); + typename SpMat::const_col_iterator m_it_end = X.m.end_col_no_sync(sv_col_end); uword count = 0; @@ -3842,10 +3842,7 @@ SpMat::resize(const uword in_rows, const uword in_cols) { arma_extra_debug_sigprint(); - if( (n_rows == in_rows) && (n_cols == in_cols) ) - { - return; - } + if( (n_rows == in_rows) && (n_cols == in_cols) ) { return; } if( (n_elem == 0) || (n_nonzero == 0) ) { @@ -4561,13 +4558,11 @@ SpMat::reset_cache() } #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) { - cache_mutex.lock(); + const std::lock_guard lock(cache_mutex); cache.reset(); sync_state = 0; - - cache_mutex.unlock(); } #else { @@ -5196,13 +5191,13 @@ SpMat::init(const SpMat& x) #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) if(x.sync_state == 1) { - x.cache_mutex.lock(); + const std::lock_guard lock(x.cache_mutex); + if(x.sync_state == 1) { (*this).init(x.cache); init_done = true; } - x.cache_mutex.unlock(); } #else if(x.sync_state == 1) @@ -5818,6 +5813,8 @@ SpMat::steal_mem(SpMat& x) if(layout_ok) { + arma_extra_debug_print("SpMat::steal_mem(): stealing memory"); + x.sync_csc(); steal_mem_simple(x); @@ -5828,6 +5825,8 @@ SpMat::steal_mem(SpMat& x) } else { + arma_extra_debug_print("SpMat::steal_mem(): copying memory"); + (*this).operator=(x); } } @@ -6800,11 +6799,9 @@ SpMat::sync_cache() const { if(sync_state == 0) { - cache_mutex.lock(); + const std::lock_guard lock(cache_mutex); sync_cache_simple(); - - cache_mutex.unlock(); } } #else @@ -6852,11 +6849,9 @@ SpMat::sync_csc() const #elif (!defined(ARMA_DONT_USE_STD_MUTEX)) if(sync_state == 1) { - cache_mutex.lock(); + const std::lock_guard lock(cache_mutex); sync_csc_simple(); - - cache_mutex.unlock(); } #else { diff --git a/Include/armadillo/armadillo_bits/SpProxy.hpp b/Include/armadillo/armadillo_bits/SpProxy.hpp index fcdc7d231..50adcc802 100644 --- a/Include/armadillo/armadillo_bits/SpProxy.hpp +++ b/Include/armadillo/armadillo_bits/SpProxy.hpp @@ -144,7 +144,7 @@ struct SpProxy< SpCol > } arma_inline uword get_n_rows() const { return Q.n_rows; } - arma_inline uword get_n_cols() const { return 1; } + constexpr uword get_n_cols() const { return 1; } arma_inline uword get_n_elem() const { return Q.n_elem; } arma_inline uword get_n_nonzero() const { return Q.n_nonzero; } @@ -195,7 +195,7 @@ struct SpProxy< SpRow > Q.sync(); } - arma_inline uword get_n_rows() const { return 1; } + constexpr uword get_n_rows() const { return 1; } arma_inline uword get_n_cols() const { return Q.n_cols; } arma_inline uword get_n_elem() const { return Q.n_elem; } arma_inline uword get_n_nonzero() const { return Q.n_nonzero; } @@ -300,7 +300,7 @@ struct SpProxy< SpSubview_col > } arma_inline uword get_n_rows() const { return Q.n_rows; } - arma_inline uword get_n_cols() const { return 1; } + constexpr uword get_n_cols() const { return 1; } arma_inline uword get_n_elem() const { return Q.n_elem; } arma_inline uword get_n_nonzero() const { return Q.n_nonzero; } @@ -402,7 +402,7 @@ struct SpProxy< SpSubview_row > Q.m.sync(); } - arma_inline uword get_n_rows() const { return 1; } + constexpr uword get_n_rows() const { return 1; } arma_inline uword get_n_cols() const { return Q.n_cols; } arma_inline uword get_n_elem() const { return Q.n_elem; } arma_inline uword get_n_nonzero() const { return Q.n_nonzero; } @@ -454,7 +454,7 @@ struct SpProxy< spdiagview > } arma_inline uword get_n_rows() const { return Q.n_rows; } - arma_inline uword get_n_cols() const { return 1; } + constexpr uword get_n_cols() const { return 1; } arma_inline uword get_n_elem() const { return Q.n_elem; } arma_inline uword get_n_nonzero() const { return Q.n_nonzero; } diff --git a/Include/armadillo/armadillo_bits/arma_config.hpp b/Include/armadillo/armadillo_bits/arma_config.hpp index b03962c2a..3bb7f9f24 100644 --- a/Include/armadillo/armadillo_bits/arma_config.hpp +++ b/Include/armadillo/armadillo_bits/arma_config.hpp @@ -209,6 +209,13 @@ struct arma_config #endif + #if defined(ARMA_DONT_ZERO_INIT) + static constexpr bool zero_init = false; + #else + static constexpr bool zero_init = true; + #endif + + static constexpr uword warn_level = (sword(ARMA_WARN_LEVEL) > 0) ? uword(ARMA_WARN_LEVEL) : 0; }; diff --git a/Include/armadillo/armadillo_bits/arma_forward.hpp b/Include/armadillo/armadillo_bits/arma_forward.hpp index a4dd28033..8b2f15d37 100644 --- a/Include/armadillo/armadillo_bits/arma_forward.hpp +++ b/Include/armadillo/armadillo_bits/arma_forward.hpp @@ -117,8 +117,6 @@ class op_rel_noteq; class gen_eye; class gen_ones; class gen_zeros; -class gen_randu; -class gen_randn; @@ -348,6 +346,7 @@ struct arma_nozeros_indicator : public arma_initmode_indicator {}; template struct injector_end_of_row {}; +// DEPRECATED: DO NOT USE IN NEW CODE static const injector_end_of_row<> endr = injector_end_of_row<>(); //!< endr indicates "end of row" when using the << operator; //!< similar conceptual meaning to std::endl diff --git a/Include/armadillo/armadillo_bits/arma_rng.hpp b/Include/armadillo/armadillo_bits/arma_rng.hpp index 9a6b90c75..08d77d263 100644 --- a/Include/armadillo/armadillo_bits/arma_rng.hpp +++ b/Include/armadillo/armadillo_bits/arma_rng.hpp @@ -345,6 +345,40 @@ struct arma_rng::randu } #endif } + + + inline + static + void + fill(eT* mem, const uword N, const double a, const double b) + { + #if defined(ARMA_RNG_ALT) + { + const double r = b - a; + + for(uword i=0; i < N; ++i) { mem[i] = eT( arma_rng_alt::randu_val() * r + a ); } + } + #elif defined(ARMA_USE_EXTERN_RNG) + { + std::uniform_real_distribution local_u_distr(a,b); + + for(uword i=0; i < N; ++i) { mem[i] = eT( local_u_distr(mt19937_64_instance) ); } + } + #else + { + if(N == uword(1)) { mem[0] = eT( arma_rng_cxx03::randu_val() * (b - a) + a ); return; } + + typedef typename std::mt19937_64::result_type local_seed_type; + + std::mt19937_64 local_engine; + std::uniform_real_distribution local_u_distr(a,b); + + local_engine.seed( local_seed_type(std::rand()) ); + + for(uword i=0; i < N; ++i) { mem[i] = eT( local_u_distr(local_engine) ); } + } + #endif + } }; @@ -438,6 +472,68 @@ struct arma_rng::randu< std::complex > } #endif } + + + inline + static + void + fill(std::complex* mem, const uword N, const double a, const double b) + { + #if defined(ARMA_RNG_ALT) + { + const double r = b - a; + + for(uword i=0; i < N; ++i) + { + const T tmp1 = T( arma_rng_alt::randu_val() * r + a ); + const T tmp2 = T( arma_rng_alt::randu_val() * r + a ); + + mem[i] = std::complex(tmp1, tmp2); + } + } + #elif defined(ARMA_USE_EXTERN_RNG) + { + std::uniform_real_distribution local_u_distr(a,b); + + for(uword i=0; i < N; ++i) + { + const T tmp1 = T( local_u_distr(mt19937_64_instance) ); + const T tmp2 = T( local_u_distr(mt19937_64_instance) ); + + mem[i] = std::complex(tmp1, tmp2); + } + } + #else + { + if(N == uword(1)) + { + const double r = b - a; + + const T tmp1 = T( arma_rng_cxx03::randu_val() * r + a); + const T tmp2 = T( arma_rng_cxx03::randu_val() * r + a); + + mem[0] = std::complex(tmp1, tmp2); + + return; + } + + typedef typename std::mt19937_64::result_type local_seed_type; + + std::mt19937_64 local_engine; + std::uniform_real_distribution local_u_distr(a,b); + + local_engine.seed( local_seed_type(std::rand()) ); + + for(uword i=0; i < N; ++i) + { + const T tmp1 = T( local_u_distr(local_engine) ); + const T tmp2 = T( local_u_distr(local_engine) ); + + mem[i] = std::complex(tmp1, tmp2); + } + } + #endif + } }; @@ -581,6 +677,24 @@ struct arma_rng::randn #endif } + + inline + static + void + fill(eT* mem, const uword N, const double mu, const double sd) + { + arma_rng::randn::fill(mem, N); + + if( (mu == double(0)) && (sd == double(1)) ) { return; } + + for(uword i=0; i > } #endif } + + + inline + static + void + fill(std::complex* mem, const uword N, const double mu, const double sd) + { + arma_rng::randn< std::complex >::fill(mem, N); + + if( (mu == double(0)) && (sd == double(1)) ) { return; } + + for(uword i=0; i& val = mem[i]; + + mem[i] = std::complex( ((val.real() * sd) + mu), ((val.imag() * sd) + mu) ); + } + } }; diff --git a/Include/armadillo/armadillo_bits/arma_version.hpp b/Include/armadillo/armadillo_bits/arma_version.hpp index 6a04e15f4..0f90b0b28 100644 --- a/Include/armadillo/armadillo_bits/arma_version.hpp +++ b/Include/armadillo/armadillo_bits/arma_version.hpp @@ -22,9 +22,9 @@ #define ARMA_VERSION_MAJOR 11 -#define ARMA_VERSION_MINOR 0 -#define ARMA_VERSION_PATCH 0 -#define ARMA_VERSION_NAME "Creme Brulee" +#define ARMA_VERSION_MINOR 4 +#define ARMA_VERSION_PATCH 2 +#define ARMA_VERSION_NAME "Ship of Theseus" diff --git a/Include/armadillo/armadillo_bits/arrayops_bones.hpp b/Include/armadillo/armadillo_bits/arrayops_bones.hpp index 2bdb31e04..0beec3ae1 100644 --- a/Include/armadillo/armadillo_bits/arrayops_bones.hpp +++ b/Include/armadillo/armadillo_bits/arrayops_bones.hpp @@ -28,10 +28,6 @@ class arrayops arma_inline static void copy(eT* dest, const eT* src, const uword n_elem); - template - arma_cold inline static void - copy_small(eT* dest, const eT* src, const uword n_elem); - template inline static void fill_zeros(eT* dest, const uword n_elem); @@ -144,11 +140,6 @@ class arrayops void inplace_set_base(eT* dest, const eT val, const uword n_elem); - template - arma_cold inline static - void - inplace_set_small(eT* dest, const eT val, const uword n_elem); - template arma_hot inline static void diff --git a/Include/armadillo/armadillo_bits/arrayops_meat.hpp b/Include/armadillo/armadillo_bits/arrayops_meat.hpp index 39b4742a5..b0cdc57a7 100644 --- a/Include/armadillo/armadillo_bits/arrayops_meat.hpp +++ b/Include/armadillo/armadillo_bits/arrayops_meat.hpp @@ -28,53 +28,7 @@ arrayops::copy(eT* dest, const eT* src, const uword n_elem) { if( (dest == src) || (n_elem == 0) ) { return; } - if(is_cx::no) - { - if(n_elem <= 9) - { - arrayops::copy_small(dest, src, n_elem); - } - else - { - std::memcpy(dest, src, n_elem*sizeof(eT)); - } - } - else - { - std::memcpy(dest, src, n_elem*sizeof(eT)); - } - } - - - -template -arma_cold -inline -void -arrayops::copy_small(eT* dest, const eT* src, const uword n_elem) - { - switch(n_elem) - { - case 9: dest[ 8] = src[ 8]; - // fallthrough - case 8: dest[ 7] = src[ 7]; - // fallthrough - case 7: dest[ 6] = src[ 6]; - // fallthrough - case 6: dest[ 5] = src[ 5]; - // fallthrough - case 5: dest[ 4] = src[ 4]; - // fallthrough - case 4: dest[ 3] = src[ 3]; - // fallthrough - case 3: dest[ 2] = src[ 2]; - // fallthrough - case 2: dest[ 1] = src[ 1]; - // fallthrough - case 1: dest[ 0] = src[ 0]; - // fallthrough - default: ; - } + std::memcpy(dest, src, n_elem*sizeof(eT)); } @@ -679,14 +633,7 @@ arrayops::inplace_set(eT* dest, const eT val, const uword n_elem) } else { - if( (n_elem <= 9) && (is_cx::no) ) - { - arrayops::inplace_set_small(dest, val, n_elem); - } - else - { - arrayops::inplace_set_simple(dest, val, n_elem); - } + arrayops::inplace_set_simple(dest, val, n_elem); } } @@ -745,38 +692,6 @@ arrayops::inplace_set_base(eT* dest, const eT val, const uword n_elem) -template -arma_cold -inline -void -arrayops::inplace_set_small(eT* dest, const eT val, const uword n_elem) - { - switch(n_elem) - { - case 9: dest[ 8] = val; - // fallthrough - case 8: dest[ 7] = val; - // fallthrough - case 7: dest[ 6] = val; - // fallthrough - case 6: dest[ 5] = val; - // fallthrough - case 5: dest[ 4] = val; - // fallthrough - case 4: dest[ 3] = val; - // fallthrough - case 3: dest[ 2] = val; - // fallthrough - case 2: dest[ 1] = val; - // fallthrough - case 1: dest[ 0] = val; - // fallthrough - default:; - } - } - - - template arma_hot inline diff --git a/Include/armadillo/armadillo_bits/auxlib_bones.hpp b/Include/armadillo/armadillo_bits/auxlib_bones.hpp index 7c5aa1654..3c888f3e4 100644 --- a/Include/armadillo/armadillo_bits/auxlib_bones.hpp +++ b/Include/armadillo/armadillo_bits/auxlib_bones.hpp @@ -259,13 +259,13 @@ class auxlib inline static bool solve_square_fast(Mat& out, Mat& A, const Base& B_expr); template - inline static bool solve_square_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool allow_ugly); + inline static bool solve_square_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr); template - inline static bool solve_square_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate, const bool allow_ugly); + inline static bool solve_square_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate); template - inline static bool solve_square_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate, const bool allow_ugly); + inline static bool solve_square_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate); // @@ -276,16 +276,16 @@ class auxlib inline static bool solve_sympd_fast_common(Mat& out, Mat& A, const Base& B_expr); template - inline static bool solve_sympd_rcond(Mat& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool allow_ugly); + inline static bool solve_sympd_rcond(Mat& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr); template - inline static bool solve_sympd_rcond(Mat< std::complex >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base< std::complex,T1>& B_expr, const bool allow_ugly); + inline static bool solve_sympd_rcond(Mat< std::complex >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base< std::complex,T1>& B_expr); template - inline static bool solve_sympd_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate, const bool allow_ugly); + inline static bool solve_sympd_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate); template - inline static bool solve_sympd_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate, const bool allow_ugly); + inline static bool solve_sympd_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate); // @@ -293,7 +293,7 @@ class auxlib inline static bool solve_rect_fast(Mat& out, Mat& A, const Base& B_expr); template - inline static bool solve_rect_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool allow_ugly); + inline static bool solve_rect_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr); // @@ -309,7 +309,7 @@ class auxlib inline static bool solve_trimat_fast(Mat& out, const Mat& A, const Base& B_expr, const uword layout); template - inline static bool solve_trimat_rcond(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const Base& B_expr, const uword layout, const bool allow_ugly); + inline static bool solve_trimat_rcond(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const Base& B_expr, const uword layout); // @@ -323,19 +323,19 @@ class auxlib inline static bool solve_band_fast_common(Mat& out, const Mat& A, const uword KL, const uword KU, const Base& B_expr); template - inline static bool solve_band_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool allow_ugly); + inline static bool solve_band_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr); template - inline static bool solve_band_rcond(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base< std::complex,T1>& B_expr, const bool allow_ugly); + inline static bool solve_band_rcond(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base< std::complex,T1>& B_expr); template - inline static bool solve_band_rcond_common(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool allow_ugly); + inline static bool solve_band_rcond_common(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const uword KL, const uword KU, const Base& B_expr); template - inline static bool solve_band_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool equilibrate, const bool allow_ugly); + inline static bool solve_band_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool equilibrate); template - inline static bool solve_band_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base,T1>& B_expr, const bool equilibrate, const bool allow_ugly); + inline static bool solve_band_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base,T1>& B_expr, const bool equilibrate); // @@ -428,9 +428,6 @@ class auxlib template inline static bool crippled_lapack(const Base&); - template - inline static typename T1::pod_type epsilon_lapack(const Base&); - template inline static bool rudimentary_sym_check(const Mat& X); diff --git a/Include/armadillo/armadillo_bits/auxlib_meat.hpp b/Include/armadillo/armadillo_bits/auxlib_meat.hpp index 0a9399ca7..b662a9f00 100644 --- a/Include/armadillo/armadillo_bits/auxlib_meat.hpp +++ b/Include/armadillo/armadillo_bits/auxlib_meat.hpp @@ -344,7 +344,7 @@ auxlib::inv_sympd_rcond(Mat& A, bool& out_sympd_state, eT& out_rcond, const out_rcond = auxlib::lu_rcond_sympd(A, norm_val); - if( (rcond_threshold > eT(0)) && (out_rcond < rcond_threshold) ) { return false; } + if( arma_isnan(out_rcond) || ((rcond_threshold > eT(0)) && (out_rcond < rcond_threshold)) ) { return false; } arma_extra_debug_print("lapack::potri()"); lapack::potri(&uplo, &n, A.memptr(), &n, &info); @@ -412,7 +412,7 @@ auxlib::inv_sympd_rcond(Mat< std::complex >& A, bool& out_sympd_state, T& out out_rcond = auxlib::lu_rcond_sympd(A, norm_val); - if( (rcond_threshold > T(0)) && (out_rcond < rcond_threshold) ) { return false; } + if( arma_isnan(out_rcond) || ((rcond_threshold > T(0)) && (out_rcond < rcond_threshold)) ) { return false; } arma_extra_debug_print("lapack::potri()"); lapack::potri(&uplo, &n, A.memptr(), &n, &info); @@ -3967,28 +3967,26 @@ auxlib::solve_square_fast(Mat& out, Mat ipiv(A_n_rows + 2); // +2 for paranoia: some versions of Lapack might be trashing memory + podarray ipiv(A.n_rows + 2); // +2 for paranoia: some versions of Lapack might be trashing memory arma_extra_debug_print("lapack::gesv()"); lapack::gesv(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info); @@ -4009,7 +4007,7 @@ auxlib::solve_square_fast(Mat& out, Mat inline bool -auxlib::solve_square_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool allow_ugly) +auxlib::solve_square_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); @@ -4024,9 +4022,9 @@ auxlib::solve_square_rcond(Mat& out, typename T1::pod_ty const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; - - arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" ); - + + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); + if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } arma_debug_assert_blas_size(A); @@ -4058,8 +4056,6 @@ auxlib::solve_square_rcond(Mat& out, typename T1::pod_ty out_rcond = auxlib::lu_rcond(A, norm_val); - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; } - return true; } #else @@ -4068,7 +4064,6 @@ auxlib::solve_square_rcond(Mat& out, typename T1::pod_ty arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4081,7 +4076,7 @@ auxlib::solve_square_rcond(Mat& out, typename T1::pod_ty template inline bool -auxlib::solve_square_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate, const bool allow_ugly) +auxlib::solve_square_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); @@ -4101,7 +4096,7 @@ auxlib::solve_square_refine(Mat& out, typename T1::pod_ty const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } @@ -4156,7 +4151,7 @@ auxlib::solve_square_refine(Mat& out, typename T1::pod_ty out_rcond = rcond; - return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0); + return ((info == 0) || (info == (n+1))); } #else { @@ -4165,7 +4160,6 @@ auxlib::solve_square_refine(Mat& out, typename T1::pod_ty arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4178,7 +4172,7 @@ auxlib::solve_square_refine(Mat& out, typename T1::pod_ty template inline bool -auxlib::solve_square_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate, const bool allow_ugly) +auxlib::solve_square_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); @@ -4199,7 +4193,7 @@ auxlib::solve_square_refine(Mat< std::complex >& out, typ const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } @@ -4254,7 +4248,7 @@ auxlib::solve_square_refine(Mat< std::complex >& out, typ out_rcond = rcond; - return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0); + return ((info == 0) || (info == (n+1))); } #else { @@ -4263,7 +4257,6 @@ auxlib::solve_square_refine(Mat< std::complex >& out, typ arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4303,14 +4296,12 @@ auxlib::solve_sympd_fast_common(Mat& out, Mat& out, Mat& out, Mat inline bool -auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool allow_ugly) +auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); @@ -4364,7 +4355,7 @@ auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; - arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } @@ -4396,8 +4387,6 @@ auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state out_rcond = auxlib::lu_rcond_sympd(A, norm_val); - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; } - return true; } #else @@ -4407,7 +4396,6 @@ auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4420,7 +4408,7 @@ auxlib::solve_sympd_rcond(Mat& out, bool& out_sympd_state template inline bool -auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base< std::complex,T1>& B_expr, const bool allow_ugly) +auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& out_sympd_state, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base< std::complex,T1>& B_expr) { arma_extra_debug_sigprint(); @@ -4430,7 +4418,7 @@ auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& out_sympd_state = false; - return auxlib::solve_square_rcond(out, out_rcond, A, B_expr, allow_ugly); + return auxlib::solve_square_rcond(out, out_rcond, A, B_expr); } #elif defined(ARMA_USE_LAPACK) { @@ -4445,7 +4433,7 @@ auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; - arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } @@ -4477,8 +4465,6 @@ auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& out_rcond = auxlib::lu_rcond_sympd(A, norm_val); - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; } - return true; } #else @@ -4488,7 +4474,6 @@ auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4501,7 +4486,7 @@ auxlib::solve_sympd_rcond(Mat< std::complex >& out, bool& template inline bool -auxlib::solve_sympd_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate, const bool allow_ugly) +auxlib::solve_sympd_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); @@ -4521,7 +4506,7 @@ auxlib::solve_sympd_refine(Mat& out, typename T1::pod_typ const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } @@ -4558,7 +4543,7 @@ auxlib::solve_sympd_refine(Mat& out, typename T1::pod_typ // NOTE: lapack::posvx() sets rcond to zero if A is not sympd out_rcond = rcond; - return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0); + return ((info == 0) || (info == (n+1))); } #else { @@ -4567,7 +4552,6 @@ auxlib::solve_sympd_refine(Mat& out, typename T1::pod_typ arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4580,7 +4564,7 @@ auxlib::solve_sympd_refine(Mat& out, typename T1::pod_typ template inline bool -auxlib::solve_sympd_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate, const bool allow_ugly) +auxlib::solve_sympd_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const Base,T1>& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); @@ -4588,7 +4572,7 @@ auxlib::solve_sympd_refine(Mat< std::complex >& out, type { arma_extra_debug_print("auxlib::solve_sympd_refine(): redirecting to auxlib::solve_square_refine() due to crippled LAPACK"); - return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate, allow_ugly); + return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate); } #elif defined(ARMA_USE_LAPACK) { @@ -4607,7 +4591,7 @@ auxlib::solve_sympd_refine(Mat< std::complex >& out, type const Mat& B = (use_copy) ? B_tmp : UB_M_as_Mat; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } @@ -4644,7 +4628,7 @@ auxlib::solve_sympd_refine(Mat< std::complex >& out, type // NOTE: lapack::cx_posvx() sets rcond to zero if A is not sympd out_rcond = rcond; - return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0); + return ((info == 0) || (info == (n+1))); } #else { @@ -4653,7 +4637,6 @@ auxlib::solve_sympd_refine(Mat< std::complex >& out, type arma_ignore(A); arma_ignore(B_expr); arma_ignore(equilibrate); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4677,7 +4660,7 @@ auxlib::solve_rect_fast(Mat& out, Mat U(B_expr.get_ref()); const Mat& B = U.M; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } @@ -4757,7 +4740,7 @@ auxlib::solve_rect_fast(Mat& out, Mat inline bool -auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr, const bool allow_ugly) +auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const Base& B_expr) { arma_extra_debug_sigprint(); @@ -4771,7 +4754,7 @@ auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type const unwrap U(B_expr.get_ref()); const Mat& B = U.M; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } @@ -4842,8 +4825,6 @@ auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type // determine quality of solution out_rcond = auxlib::rcond_trimat(R, 0); // 0: upper triangular; 1: lower triangular - - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; } } else if(A.n_rows < A.n_cols) @@ -4865,8 +4846,6 @@ auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type // determine quality of solution out_rcond = auxlib::rcond_trimat(L, 1); // 0: upper triangular; 1: lower triangular - - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; } } if(tmp.n_rows == A.n_cols) @@ -4886,7 +4865,6 @@ auxlib::solve_rect_rcond(Mat& out, typename T1::pod_type arma_ignore(out_rcond); arma_ignore(A); arma_ignore(B_expr); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -4909,7 +4887,7 @@ auxlib::solve_approx_svd(Mat& out, Mat U(B_expr.get_ref()); const Mat& B = U.M; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } @@ -5030,7 +5008,7 @@ auxlib::solve_approx_svd(Mat< std::complex >& out, Mat< s const unwrap U(B_expr.get_ref()); const Mat& B = U.M; - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_cols, B.n_cols); return true; } @@ -5151,7 +5129,7 @@ auxlib::solve_trimat_fast(Mat& out, const Mat& out, const Mat inline bool -auxlib::solve_trimat_rcond(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const Base& B_expr, const uword layout, const bool allow_ugly) +auxlib::solve_trimat_rcond(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const Base& B_expr, const uword layout) { arma_extra_debug_sigprint(); @@ -5201,7 +5179,7 @@ auxlib::solve_trimat_rcond(Mat& out, typename T1::pod_ty const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; - arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_cols, B_n_cols); return true; } @@ -5222,8 +5200,6 @@ auxlib::solve_trimat_rcond(Mat& out, typename T1::pod_ty // determine quality of solution out_rcond = auxlib::rcond_trimat(A, layout); - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(A)) ) { return false; } - return true; } #else @@ -5233,7 +5209,6 @@ auxlib::solve_trimat_rcond(Mat& out, typename T1::pod_ty arma_ignore(A); arma_ignore(B_expr); arma_ignore(layout); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -5298,7 +5273,7 @@ auxlib::solve_band_fast_common(Mat& out, const Mat& out, const Mat inline bool -auxlib::solve_band_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool allow_ugly) +auxlib::solve_band_rcond(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr) { arma_extra_debug_sigprint(); - return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr, allow_ugly); + return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr); } @@ -5360,7 +5335,7 @@ auxlib::solve_band_rcond(Mat& out, typename T1::pod_type& template inline bool -auxlib::solve_band_rcond(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base< std::complex,T1>& B_expr, const bool allow_ugly) +auxlib::solve_band_rcond(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base< std::complex,T1>& B_expr) { arma_extra_debug_sigprint(); @@ -5371,11 +5346,11 @@ auxlib::solve_band_rcond(Mat< std::complex >& out, typena arma_ignore(KL); arma_ignore(KU); - return auxlib::solve_square_rcond(out, out_rcond, A, B_expr, allow_ugly); + return auxlib::solve_square_rcond(out, out_rcond, A, B_expr); } #else { - return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr, allow_ugly); + return auxlib::solve_band_rcond_common(out, out_rcond, A, KL, KU, B_expr); } #endif } @@ -5386,7 +5361,7 @@ auxlib::solve_band_rcond(Mat< std::complex >& out, typena template inline bool -auxlib::solve_band_rcond_common(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool allow_ugly) +auxlib::solve_band_rcond_common(Mat& out, typename T1::pod_type& out_rcond, const Mat& A, const uword KL, const uword KU, const Base& B_expr) { arma_extra_debug_sigprint(); @@ -5402,7 +5377,7 @@ auxlib::solve_band_rcond_common(Mat& out, typename T1::p const uword B_n_rows = out.n_rows; const uword B_n_cols = out.n_cols; - arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B_n_rows), "solve(): number of rows in given matrices must be the same", [&](){ out.soft_reset(); } ); if(A.is_empty() || out.is_empty()) { out.zeros(A.n_rows, B_n_cols); return true; } @@ -5444,8 +5419,6 @@ auxlib::solve_band_rcond_common(Mat& out, typename T1::p out_rcond = auxlib::lu_rcond_band(AB, KL, KU, ipiv, norm_val); - if( (allow_ugly == false) && (out_rcond < auxlib::epsilon_lapack(AB)) ) { return false; } - return true; } #else @@ -5456,7 +5429,6 @@ auxlib::solve_band_rcond_common(Mat& out, typename T1::p arma_ignore(KL); arma_ignore(KU); arma_ignore(B_expr); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -5469,7 +5441,7 @@ auxlib::solve_band_rcond_common(Mat& out, typename T1::p template inline bool -auxlib::solve_band_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool equilibrate, const bool allow_ugly) +auxlib::solve_band_refine(Mat& out, typename T1::pod_type& out_rcond, Mat& A, const uword KL, const uword KU, const Base& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); @@ -5479,7 +5451,7 @@ auxlib::solve_band_refine(Mat& out, typename T1::pod_type Mat B = B_expr.get_ref(); // B is overwritten - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } @@ -5540,7 +5512,7 @@ auxlib::solve_band_refine(Mat& out, typename T1::pod_type out_rcond = rcond; - return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0); + return ((info == 0) || (info == (n+1))); } #else { @@ -5551,7 +5523,6 @@ auxlib::solve_band_refine(Mat& out, typename T1::pod_type arma_ignore(KU); arma_ignore(B_expr); arma_ignore(equilibrate); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -5564,7 +5535,7 @@ auxlib::solve_band_refine(Mat& out, typename T1::pod_type template inline bool -auxlib::solve_band_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base,T1>& B_expr, const bool equilibrate, const bool allow_ugly) +auxlib::solve_band_refine(Mat< std::complex >& out, typename T1::pod_type& out_rcond, Mat< std::complex >& A, const uword KL, const uword KU, const Base,T1>& B_expr, const bool equilibrate) { arma_extra_debug_sigprint(); @@ -5575,7 +5546,7 @@ auxlib::solve_band_refine(Mat< std::complex >& out, typen arma_ignore(KL); arma_ignore(KU); - return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate, allow_ugly); + return auxlib::solve_square_refine(out, out_rcond, A, B_expr, equilibrate); } #elif defined(ARMA_USE_LAPACK) { @@ -5584,7 +5555,7 @@ auxlib::solve_band_refine(Mat< std::complex >& out, typen Mat B = B_expr.get_ref(); // B is overwritten - arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in the given matrices must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "solve(): number of rows in given matrices must be the same" ); if(A.is_empty() || B.is_empty()) { out.zeros(A.n_rows, B.n_cols); return true; } @@ -5645,7 +5616,7 @@ auxlib::solve_band_refine(Mat< std::complex >& out, typen out_rcond = rcond; - return (allow_ugly) ? ((info == 0) || (info == (n+1))) : (info == 0); + return ((info == 0) || (info == (n+1))); } #else { @@ -5656,7 +5627,6 @@ auxlib::solve_band_refine(Mat< std::complex >& out, typen arma_ignore(KU); arma_ignore(B_expr); arma_ignore(equilibrate); - arma_ignore(allow_ugly); arma_stop_logic_error("solve(): use of LAPACK must be enabled"); return false; } @@ -5718,7 +5688,7 @@ auxlib::solve_tridiag_fast_common(Mat& out, const Mat& A, Mat& B, Mat& vsl, Mat& vsr, const Base& X_e A = X_expr.get_ref(); B = Y_expr.get_ref(); - arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized" ); + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } ); arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" ); @@ -6039,7 +6009,7 @@ auxlib::qz(Mat< std::complex >& A, Mat< std::complex >& B, Mat< std::compl A = X_expr.get_ref(); B = Y_expr.get_ref(); - arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized" ); + arma_debug_check( ((A.is_square() == false) || (B.is_square() == false)), "qz(): given matrices must be square sized", [&](){ A.soft_reset(); B.soft_reset(); } ); arma_debug_check( (A.n_rows != B.n_rows), "qz(): given matrices must have the same size" ); @@ -6653,58 +6623,6 @@ auxlib::crippled_lapack(const Base&) -template -inline -typename T1::pod_type -auxlib::epsilon_lapack(const Base&) - { - typedef typename T1::pod_type T; - - return T(0.5)*std::numeric_limits::epsilon(); - - // value reverse engineered from dgesvx.f and dlamch.f - // http://www.netlib.org/lapack/explore-html/da/d21/dgesvx_8f.html - // http://www.netlib.org/lapack/explore-html/d5/dd4/dlamch_8f.html - // - // Fortran epsilon(X) function: - // https://gcc.gnu.org/onlinedocs/gfortran/EPSILON.html - // "EPSILON(X) returns the smallest number E of the same kind as X such that 1 + E > 1" - // - // C++ std::numeric_limits::epsilon() function: - // https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon - // "the difference between 1.0 and the next value representable by the floating-point type T" - // - // extract from dgesvx.f: - // - // IF( rcond.LT.dlamch( 'Epsilon' ) ) - // info = n + 1 - // RETURN - // - // extract from dlamch.f: - // - // * rnd = 1.0 when rounding occurs in addition, 0.0 otherwise - // ... - // * Assume rounding, not chopping. Always - // - // rnd = one - // - // IF( one.EQ.rnd ) THEN - // eps = epsilon(zero) * 0.5 - // ELSE - // eps = epsilon(zero) - // END IF - // ... - // IF( lsame( cmach, 'E' ) ) THEN - // rmach = eps - // ... - // END IF - // ... - // dlamch = rmach - // RETURN - } - - - template inline bool diff --git a/Include/armadillo/armadillo_bits/compiler_setup.hpp b/Include/armadillo/armadillo_bits/compiler_setup.hpp index dbe77a914..1927b9769 100644 --- a/Include/armadillo/armadillo_bits/compiler_setup.hpp +++ b/Include/armadillo/armadillo_bits/compiler_setup.hpp @@ -267,12 +267,12 @@ #define arma_hot __attribute__((__hot__)) #endif - #if __has_attribute(__minsize__) - #undef arma_cold - #define arma_cold __attribute__((__minsize__)) - #elif __has_attribute(__cold__) + #if __has_attribute(__cold__) #undef arma_cold #define arma_cold __attribute__((__cold__)) + #elif __has_attribute(__minsize__) + #undef arma_cold + #define arma_cold __attribute__((__minsize__)) #endif #if defined(__has_builtin) && __has_builtin(__builtin_assume_aligned) @@ -465,3 +465,11 @@ #undef minor #undef major + + +// optionally allow disabling of compile-time deprecation messages (not recommended) + +#if defined(ARMA_IGNORE_DEPRECATED_MARKER) && (!defined(ARMA_DONT_IGNORE_DEPRECATED_MARKER)) && (!defined(ARMA_EXTRA_DEBUG)) + #undef arma_deprecated + #define arma_deprecated +#endif diff --git a/Include/armadillo/armadillo_bits/config.hpp b/Include/armadillo/armadillo_bits/config.hpp index 575b5e29d..9de142d2c 100644 --- a/Include/armadillo/armadillo_bits/config.hpp +++ b/Include/armadillo/armadillo_bits/config.hpp @@ -22,7 +22,7 @@ #endif //// The level of warning messages printed to ARMA_CERR_STREAM. //// Must be an integer >= 0. The default value is 2. -//// 0 = no warnings +//// 0 = no warnings; generally not recommended //// 1 = only critical warnings about arguments and/or data which are likely to lead to incorrect results //// 2 = as per level 1, and warnings about poorly conditioned systems (low rcond) detected by solve(), spsolve(), etc //// 3 = as per level 2, and warnings about failed decompositions, failed saving/loading, etc @@ -105,13 +105,13 @@ //// These "hidden" arguments are typically tacked onto the end of function definitions. // #define ARMA_USE_TBB_ALLOC -//// Uncomment the above line if you want to use Intel TBB scalable_malloc() and scalable_free() instead of standard malloc() and free() +//// Uncomment the above line to use Intel TBB scalable_malloc() and scalable_free() instead of standard malloc() and free() // #define ARMA_USE_MKL_ALLOC -//// Uncomment the above line if you want to use Intel MKL mkl_malloc() and mkl_free() instead of standard malloc() and free() +//// Uncomment the above line to use Intel MKL mkl_malloc() and mkl_free() instead of standard malloc() and free() // #define ARMA_USE_MKL_TYPES -//// Uncomment the above line if you want to use Intel MKL types for complex numbers. +//// Uncomment the above line to use Intel MKL types for complex numbers. //// You will need to include appropriate MKL headers before the Armadillo header. //// You may also need to enable or disable the following options: //// ARMA_BLAS_LONG, ARMA_BLAS_LONG_LONG, ARMA_USE_FORTRAN_HIDDEN_ARGS @@ -136,31 +136,31 @@ #endif #if !defined(ARMA_OPTIMISE_BAND) - // #define ARMA_OPTIMISE_BAND - //// Comment out the above line if you don't want automatically optimised handling + #define ARMA_OPTIMISE_BAND + //// Comment out the above line to disable optimised handling //// of band matrices by solve() and chol() #endif #if !defined(ARMA_OPTIMISE_SYMPD) #define ARMA_OPTIMISE_SYMPD - //// Comment out the above line if you don't want automatically optimised handling + //// Comment out the above line to disable optimised handling //// of symmetric/hermitian positive definite matrices by various functions: //// solve(), inv(), pinv(), expmat(), logmat(), sqrtmat(), rcond(), rank() #endif #if !defined(ARMA_OPTIMISE_INVEXPR) #define ARMA_OPTIMISE_INVEXPR - //// Comment out the above line if you don't want automatically optimised handling + //// Comment out the above line to disable optimised handling //// of inv() and inv_sympd() within compound expressions #endif #if !defined(ARMA_CHECK_NONFINITE) #define ARMA_CHECK_NONFINITE - //// Comment out the above line if you don't want automatic checking for nonfinite matrices + //// Comment out the above line to disable checking for nonfinite matrices #endif -// #define ARMA_USE_HDF5_ALT -#if defined(ARMA_USE_HDF5_ALT) && defined(ARMA_USE_WRAPPER) +// #define ARMA_USE_HDF5_CMAKE +#if defined(ARMA_USE_HDF5_CMAKE) && defined(ARMA_USE_WRAPPER) #undef ARMA_USE_HDF5 #define ARMA_USE_HDF5 @@ -168,7 +168,7 @@ #endif #if !defined(ARMA_MAT_PREALLOC) -#define ARMA_MAT_PREALLOC 4 +#define ARMA_MAT_PREALLOC 6 #endif //// This is the number of preallocated elements used by matrices and vectors; //// it must be an integer that is at least 1. @@ -176,7 +176,7 @@ //// change the number to the size of your vectors. #if !defined(ARMA_OPENMP_THRESHOLD) -#define ARMA_OPENMP_THRESHOLD 320 +#define ARMA_OPENMP_THRESHOLD 400 #endif //// The minimum number of elements in a matrix to allow OpenMP based parallelisation; //// it must be an integer that is at least 1. @@ -188,17 +188,22 @@ //// it must be an integer that is at least 1. // #define ARMA_NO_DEBUG -//// Uncomment the above line if you want to disable all run-time checks. -//// This will result in faster code, but you first need to make sure that your code runs correctly! -//// We strongly recommend to have the run-time checks enabled during development, -//// as this greatly aids in finding mistakes in your code, and hence speeds up development. -//// We recommend that run-time checks be disabled _only_ for the shipped version of your program. +//// Uncomment the above line to disable all run-time checks. NOT RECOMMENDED. +//// It is strongly recommended that run-time checks are enabled during development, +//// as this greatly aids in finding mistakes in your code. // #define ARMA_EXTRA_DEBUG -//// Uncomment the above line if you want to see the function traces of how Armadillo evaluates expressions. +//// Uncomment the above line to see the function traces of how Armadillo evaluates expressions. //// This is mainly useful for debugging of the library. +#if defined(ARMA_EXTRA_DEBUG) + #undef ARMA_NO_DEBUG + #undef ARMA_WARN_LEVEL + #define ARMA_WARN_LEVEL 3 +#endif + + #if defined(ARMA_DEFAULT_OSTREAM) #pragma message ("WARNING: support for ARMA_DEFAULT_OSTREAM is deprecated and will be removed;") #pragma message ("WARNING: use ARMA_COUT_STREAM and ARMA_CERR_STREAM instead") @@ -263,7 +268,7 @@ #if defined(ARMA_DONT_USE_WRAPPER) #undef ARMA_USE_WRAPPER - #undef ARMA_USE_HDF5_ALT + #undef ARMA_USE_HDF5_CMAKE #endif #if defined(ARMA_DONT_USE_FORTRAN_HIDDEN_ARGS) @@ -310,7 +315,7 @@ #if defined(ARMA_DONT_USE_HDF5) #undef ARMA_USE_HDF5 - #undef ARMA_USE_HDF5_ALT + #undef ARMA_USE_HDF5_CMAKE #endif #if defined(ARMA_DONT_OPTIMISE_BAND) || defined(ARMA_DONT_OPTIMISE_SOLVE_BAND) @@ -329,10 +334,17 @@ #undef ARMA_CHECK_NONFINITE #endif -// #if defined(ARMA_DONT_PRINT_ERRORS) -// #pragma message ("WARNING: support for ARMA_DONT_PRINT_ERRORS option has been removed;") -// #pragma message ("WARNING: use ARMA_WARN_LEVEL and ARMA_DONT_PRINT_EXCEPTIONS options instead.") -// #endif +#if defined(ARMA_DONT_PRINT_ERRORS) + #pragma message ("INFO: support for ARMA_DONT_PRINT_ERRORS option has been removed") + + #if defined(ARMA_PRINT_EXCEPTIONS) + #pragma message ("INFO: suggest to use ARMA_WARN_LEVEL and ARMA_DONT_PRINT_EXCEPTIONS options instead") + #else + #pragma message ("INFO: suggest to use ARMA_WARN_LEVEL option instead") + #endif + + #pragma message ("INFO: see the documentation for details") +#endif #if defined(ARMA_DONT_PRINT_EXCEPTIONS) #undef ARMA_PRINT_EXCEPTIONS diff --git a/Include/armadillo/armadillo_bits/config.hpp.cmake b/Include/armadillo/armadillo_bits/config.hpp.cmake index 680ab1aa8..07c85b837 100644 --- a/Include/armadillo/armadillo_bits/config.hpp.cmake +++ b/Include/armadillo/armadillo_bits/config.hpp.cmake @@ -22,7 +22,7 @@ #endif //// The level of warning messages printed to ARMA_CERR_STREAM. //// Must be an integer >= 0. The default value is 2. -//// 0 = no warnings +//// 0 = no warnings; generally not recommended //// 1 = only critical warnings about arguments and/or data which are likely to lead to incorrect results //// 2 = as per level 1, and warnings about poorly conditioned systems (low rcond) detected by solve(), spsolve(), etc //// 3 = as per level 2, and warnings about failed decompositions, failed saving/loading, etc @@ -105,13 +105,13 @@ //// These "hidden" arguments are typically tacked onto the end of function definitions. // #define ARMA_USE_TBB_ALLOC -//// Uncomment the above line if you want to use Intel TBB scalable_malloc() and scalable_free() instead of standard malloc() and free() +//// Uncomment the above line to use Intel TBB scalable_malloc() and scalable_free() instead of standard malloc() and free() // #define ARMA_USE_MKL_ALLOC -//// Uncomment the above line if you want to use Intel MKL mkl_malloc() and mkl_free() instead of standard malloc() and free() +//// Uncomment the above line to use Intel MKL mkl_malloc() and mkl_free() instead of standard malloc() and free() // #define ARMA_USE_MKL_TYPES -//// Uncomment the above line if you want to use Intel MKL types for complex numbers. +//// Uncomment the above line to use Intel MKL types for complex numbers. //// You will need to include appropriate MKL headers before the Armadillo header. //// You may also need to enable or disable the following options: //// ARMA_BLAS_LONG, ARMA_BLAS_LONG_LONG, ARMA_USE_FORTRAN_HIDDEN_ARGS @@ -137,30 +137,30 @@ #if !defined(ARMA_OPTIMISE_BAND) #define ARMA_OPTIMISE_BAND - //// Comment out the above line if you don't want automatically optimised handling + //// Comment out the above line to disable optimised handling //// of band matrices by solve() and chol() #endif #if !defined(ARMA_OPTIMISE_SYMPD) #define ARMA_OPTIMISE_SYMPD - //// Comment out the above line if you don't want automatically optimised handling + //// Comment out the above line to disable optimised handling //// of symmetric/hermitian positive definite matrices by various functions: //// solve(), inv(), pinv(), expmat(), logmat(), sqrtmat(), rcond(), rank() #endif #if !defined(ARMA_OPTIMISE_INVEXPR) #define ARMA_OPTIMISE_INVEXPR - //// Comment out the above line if you don't want automatically optimised handling + //// Comment out the above line to disable optimised handling //// of inv() and inv_sympd() within compound expressions #endif #if !defined(ARMA_CHECK_NONFINITE) #define ARMA_CHECK_NONFINITE - //// Comment out the above line if you don't want automatic checking for nonfinite matrices + //// Comment out the above line to disable checking for nonfinite matrices #endif -// #define ARMA_USE_HDF5_ALT -#if defined(ARMA_USE_HDF5_ALT) && defined(ARMA_USE_WRAPPER) +#cmakedefine ARMA_USE_HDF5_CMAKE +#if defined(ARMA_USE_HDF5_CMAKE) && defined(ARMA_USE_WRAPPER) #undef ARMA_USE_HDF5 #define ARMA_USE_HDF5 @@ -188,17 +188,22 @@ //// it must be an integer that is at least 1. // #define ARMA_NO_DEBUG -//// Uncomment the above line if you want to disable all run-time checks. -//// This will result in faster code, but you first need to make sure that your code runs correctly! -//// We strongly recommend to have the run-time checks enabled during development, -//// as this greatly aids in finding mistakes in your code, and hence speeds up development. -//// We recommend that run-time checks be disabled _only_ for the shipped version of your program. +//// Uncomment the above line to disable all run-time checks. NOT RECOMMENDED. +//// It is strongly recommended that run-time checks are enabled during development, +//// as this greatly aids in finding mistakes in your code. // #define ARMA_EXTRA_DEBUG -//// Uncomment the above line if you want to see the function traces of how Armadillo evaluates expressions. +//// Uncomment the above line to see the function traces of how Armadillo evaluates expressions. //// This is mainly useful for debugging of the library. +#if defined(ARMA_EXTRA_DEBUG) + #undef ARMA_NO_DEBUG + #undef ARMA_WARN_LEVEL + #define ARMA_WARN_LEVEL 3 +#endif + + #if defined(ARMA_DEFAULT_OSTREAM) #pragma message ("WARNING: support for ARMA_DEFAULT_OSTREAM is deprecated and will be removed;") #pragma message ("WARNING: use ARMA_COUT_STREAM and ARMA_CERR_STREAM instead") @@ -263,7 +268,7 @@ #if defined(ARMA_DONT_USE_WRAPPER) #undef ARMA_USE_WRAPPER - #undef ARMA_USE_HDF5_ALT + #undef ARMA_USE_HDF5_CMAKE #endif #if defined(ARMA_DONT_USE_FORTRAN_HIDDEN_ARGS) @@ -310,7 +315,7 @@ #if defined(ARMA_DONT_USE_HDF5) #undef ARMA_USE_HDF5 - #undef ARMA_USE_HDF5_ALT + #undef ARMA_USE_HDF5_CMAKE #endif #if defined(ARMA_DONT_OPTIMISE_BAND) || defined(ARMA_DONT_OPTIMISE_SOLVE_BAND) @@ -329,10 +334,17 @@ #undef ARMA_CHECK_NONFINITE #endif -// #if defined(ARMA_DONT_PRINT_ERRORS) -// #pragma message ("WARNING: support for ARMA_DONT_PRINT_ERRORS option has been removed;") -// #pragma message ("WARNING: use ARMA_WARN_LEVEL and ARMA_DONT_PRINT_EXCEPTIONS options instead.") -// #endif +#if defined(ARMA_DONT_PRINT_ERRORS) + #pragma message ("INFO: support for ARMA_DONT_PRINT_ERRORS option has been removed") + + #if defined(ARMA_PRINT_EXCEPTIONS) + #pragma message ("INFO: suggest to use ARMA_WARN_LEVEL and ARMA_DONT_PRINT_EXCEPTIONS options instead") + #else + #pragma message ("INFO: suggest to use ARMA_WARN_LEVEL option instead") + #endif + + #pragma message ("INFO: see the documentation for details") +#endif #if defined(ARMA_DONT_PRINT_EXCEPTIONS) #undef ARMA_PRINT_EXCEPTIONS diff --git a/Include/armadillo/armadillo_bits/debug.hpp b/Include/armadillo/armadillo_bits/debug.hpp index 162496858..6b6fe005a 100644 --- a/Include/armadillo/armadillo_bits/debug.hpp +++ b/Include/armadillo/armadillo_bits/debug.hpp @@ -448,6 +448,16 @@ arma_check(const bool state, const T1& x) } +template +arma_hot +inline +void +arma_check(const bool state, const char* x, const Functor& fn) + { + if(state) { fn(); arma_stop_logic_error(x); } + } + + arma_hot inline void @@ -457,6 +467,16 @@ arma_check(const bool state, const char* x, const char* y) } +template +arma_hot +inline +void +arma_check(const bool state, const char* x, const char* y, const Functor& fn) + { + if(state) { fn(); arma_stop_logic_error(x,y); } + } + + template arma_hot inline @@ -1368,19 +1388,16 @@ arma_assert_atlas_size(const T1& A, const T2& B) #if defined(ARMA_EXTRA_DEBUG) - #undef ARMA_WARN_LEVEL - #define ARMA_WARN_LEVEL 3 - #define arma_extra_debug_sigprint arma_sigprint(ARMA_FNSIG); arma_bktprint #define arma_extra_debug_sigprint_this arma_sigprint(ARMA_FNSIG); arma_thisprint #define arma_extra_debug_print arma_print - + #else #define arma_extra_debug_sigprint true ? (void)0 : arma_bktprint #define arma_extra_debug_sigprint_this true ? (void)0 : arma_thisprint #define arma_extra_debug_print true ? (void)0 : arma_print - + #endif @@ -1438,6 +1455,7 @@ arma_assert_atlas_size(const T1& A, const T2& B) out << "@ arma_config::optimise_sympd = " << arma_config::optimise_sympd << '\n'; out << "@ arma_config::optimise_invexpr = " << arma_config::optimise_invexpr << '\n'; out << "@ arma_config::check_nonfinite = " << arma_config::check_nonfinite << '\n'; + out << "@ arma_config::zero_init = " << arma_config::zero_init << '\n'; out << "@ sizeof(void*) = " << sizeof(void*) << '\n'; out << "@ sizeof(int) = " << sizeof(int) << '\n'; out << "@ sizeof(long) = " << sizeof(long) << '\n'; diff --git a/Include/armadillo/armadillo_bits/def_hdf5.hpp b/Include/armadillo/armadillo_bits/def_hdf5.hpp index f59c76a2b..cb4c3f427 100644 --- a/Include/armadillo/armadillo_bits/def_hdf5.hpp +++ b/Include/armadillo/armadillo_bits/def_hdf5.hpp @@ -18,7 +18,7 @@ #if defined(ARMA_USE_HDF5) -#if !defined(ARMA_USE_HDF5_ALT) +#if !defined(ARMA_USE_HDF5_CMAKE) // macros needed if the wrapper run-time library is not being used diff --git a/Include/armadillo/armadillo_bits/distr_param.hpp b/Include/armadillo/armadillo_bits/distr_param.hpp index 3f4941789..61f3c2346 100644 --- a/Include/armadillo/armadillo_bits/distr_param.hpp +++ b/Include/armadillo/armadillo_bits/distr_param.hpp @@ -26,41 +26,64 @@ class distr_param { public: - uword state; + const uword state; - union - { - int a_int; - double a_double; - }; + private: - union - { - int b_int; - double b_double; - }; + int a_int; + int b_int; + + double a_double; + double b_double; + public: inline distr_param() - : state(0) + : state (0) + , a_int (0) + , b_int (0) + , a_double(0) + , b_double(0) { } inline explicit distr_param(const int a, const int b) - : state(1) - , a_int(a) - , b_int(b) + : state (1) + , a_int (a) + , b_int (b) + , a_double(double(a)) + , b_double(double(b)) { } inline explicit distr_param(const double a, const double b) - : state(2) + : state (2) + , a_int (int(a)) + , b_int (int(b)) , a_double(a) , b_double(b) { } + + + inline void get_int_vals(int& out_a, int& out_b) const + { + if(state == 0) { return; } + + out_a = a_int; + out_b = b_int; + } + + + inline void get_double_vals(double& out_a, double& out_b) const + { + if(state == 0) { return; } + + out_a = a_double; + out_b = b_double; + } }; diff --git a/Include/armadillo/armadillo_bits/eglue_core_meat.hpp b/Include/armadillo/armadillo_bits/eglue_core_meat.hpp index 8e21bf03c..1978ecba8 100644 --- a/Include/armadillo/armadillo_bits/eglue_core_meat.hpp +++ b/Include/armadillo/armadillo_bits/eglue_core_meat.hpp @@ -264,8 +264,8 @@ eglue_core::apply(outT& out, const eGlue& x) typedef typename T1::elem_type eT; - const bool use_at = (Proxy::use_at || Proxy::use_at); - const bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); + constexpr bool use_at = (Proxy::use_at || Proxy::use_at); + constexpr bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); // NOTE: we're assuming that the matrix has already been set to the correct size and there is no aliasing; // size setting and alias checking is done by either the Mat contructor or operator=() @@ -371,8 +371,8 @@ eglue_core::apply_inplace_plus(Mat& out, con eT* out_mem = out.memptr(); - const bool use_at = (Proxy::use_at || Proxy::use_at); - const bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); + constexpr bool use_at = (Proxy::use_at || Proxy::use_at); + constexpr bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -469,8 +469,8 @@ eglue_core::apply_inplace_minus(Mat& out, co eT* out_mem = out.memptr(); - const bool use_at = (Proxy::use_at || Proxy::use_at); - const bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); + constexpr bool use_at = (Proxy::use_at || Proxy::use_at); + constexpr bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -567,8 +567,8 @@ eglue_core::apply_inplace_schur(Mat& out, co eT* out_mem = out.memptr(); - const bool use_at = (Proxy::use_at || Proxy::use_at); - const bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); + constexpr bool use_at = (Proxy::use_at || Proxy::use_at); + constexpr bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -665,8 +665,8 @@ eglue_core::apply_inplace_div(Mat& out, cons eT* out_mem = out.memptr(); - const bool use_at = (Proxy::use_at || Proxy::use_at); - const bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); + constexpr bool use_at = (Proxy::use_at || Proxy::use_at); + constexpr bool use_mp = (Proxy::use_mp || Proxy::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -761,8 +761,8 @@ eglue_core::apply(Cube& out, const eGlueCube typedef typename T1::elem_type eT; - const bool use_at = (ProxyCube::use_at || ProxyCube::use_at); - const bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); + constexpr bool use_at = (ProxyCube::use_at || ProxyCube::use_at); + constexpr bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); // NOTE: we're assuming that the cube has already been set to the correct size and there is no aliasing; // size setting and alias checking is done by either the Cube contructor or operator=() @@ -870,8 +870,8 @@ eglue_core::apply_inplace_plus(Cube& out, co eT* out_mem = out.memptr(); - const bool use_at = (ProxyCube::use_at || ProxyCube::use_at); - const bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); + constexpr bool use_at = (ProxyCube::use_at || ProxyCube::use_at); + constexpr bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -969,8 +969,8 @@ eglue_core::apply_inplace_minus(Cube& out, c eT* out_mem = out.memptr(); - const bool use_at = (ProxyCube::use_at || ProxyCube::use_at); - const bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); + constexpr bool use_at = (ProxyCube::use_at || ProxyCube::use_at); + constexpr bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -1068,8 +1068,8 @@ eglue_core::apply_inplace_schur(Cube& out, c eT* out_mem = out.memptr(); - const bool use_at = (ProxyCube::use_at || ProxyCube::use_at); - const bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); + constexpr bool use_at = (ProxyCube::use_at || ProxyCube::use_at); + constexpr bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); if(use_at == false) { @@ -1167,8 +1167,8 @@ eglue_core::apply_inplace_div(Cube& out, con eT* out_mem = out.memptr(); - const bool use_at = (ProxyCube::use_at || ProxyCube::use_at); - const bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); + constexpr bool use_at = (ProxyCube::use_at || ProxyCube::use_at); + constexpr bool use_mp = (ProxyCube::use_mp || ProxyCube::use_mp) && (arma_config::openmp); if(use_at == false) { diff --git a/Include/armadillo/armadillo_bits/eop_aux.hpp b/Include/armadillo/armadillo_bits/eop_aux.hpp index fb96024fa..2b66ef2a6 100644 --- a/Include/armadillo/armadillo_bits/eop_aux.hpp +++ b/Include/armadillo/armadillo_bits/eop_aux.hpp @@ -92,7 +92,7 @@ class eop_aux template arma_inline static typename arma_real_only::result trunc (const eT x) { return std::trunc(x); } template arma_inline static typename arma_cx_only::result trunc (const eT& x) { return eT( std::trunc(x.real()), std::trunc(x.imag()) ); } - template arma_inline static typename arma_integral_only::result log2 (const eT x) { return eT( std::log(double(x))/ double(0.69314718055994530942) ); } + template arma_inline static typename arma_integral_only::result log2 (const eT x) { return eT( std::log2(double(x)) ); } template arma_inline static typename arma_real_only::result log2 (const eT x) { return std::log2(x); } template arma_inline static typename arma_cx_only::result log2 (const eT& x) { typedef typename get_pod_type::result T; return std::log(x) / T(0.69314718055994530942); } @@ -100,7 +100,7 @@ class eop_aux template arma_inline static typename arma_real_only::result log1p (const eT x) { return std::log1p(x); } template arma_inline static typename arma_cx_only::result log1p (const eT& x) { arma_ignore(x); return eT(0); } - template arma_inline static typename arma_integral_only::result exp2 (const eT x) { return eT( std::pow(double(2), double(x)) ); } + template arma_inline static typename arma_integral_only::result exp2 (const eT x) { return eT( std::exp2(double(x)) ); } template arma_inline static typename arma_real_only::result exp2 (const eT x) { return std::exp2(x); } template arma_inline static typename arma_cx_only::result exp2 (const eT& x) { typedef typename get_pod_type::result T; return std::pow( T(2), x); } diff --git a/Include/armadillo/armadillo_bits/field_bones.hpp b/Include/armadillo/armadillo_bits/field_bones.hpp index c7dfc6f2b..303cac4a6 100644 --- a/Include/armadillo/armadillo_bits/field_bones.hpp +++ b/Include/armadillo/armadillo_bits/field_bones.hpp @@ -127,8 +127,8 @@ class field arma_inline arma_warn_unused const oT& back() const; - arma_cold inline field_injector operator<<(const oT& val); - arma_cold inline field_injector operator<<(const injector_end_of_row<>& x); + arma_deprecated inline field_injector operator<<(const oT& val); + arma_deprecated inline field_injector operator<<(const injector_end_of_row<>& x); inline subview_field row(const uword row_num); diff --git a/Include/armadillo/armadillo_bits/field_meat.hpp b/Include/armadillo/armadillo_bits/field_meat.hpp index 22bb45031..6ab67999c 100644 --- a/Include/armadillo/armadillo_bits/field_meat.hpp +++ b/Include/armadillo/armadillo_bits/field_meat.hpp @@ -427,6 +427,8 @@ field::operator=(field&& X) { arma_extra_debug_sigprint(arma_str::format("this = %x X = %x") % this % &X); + if(this == &X) { return *this; } + reset(); access::rw(n_rows ) = X.n_rows; @@ -758,7 +760,7 @@ field::back() const template -arma_cold +arma_deprecated inline field_injector< field > field::operator<<(const oT& val) @@ -769,7 +771,7 @@ field::operator<<(const oT& val) template -arma_cold +arma_deprecated inline field_injector< field > field::operator<<(const injector_end_of_row<>& x) diff --git a/Include/armadillo/armadillo_bits/fn_accu.hpp b/Include/armadillo/armadillo_bits/fn_accu.hpp index b69fc3223..fabf6c676 100644 --- a/Include/armadillo/armadillo_bits/fn_accu.hpp +++ b/Include/armadillo/armadillo_bits/fn_accu.hpp @@ -845,23 +845,36 @@ accu(const SpBase& expr) const SpProxy P(expr.get_ref()); + const uword N = P.get_n_nonzero(); + + if(N == 0) { return eT(0); } + if(SpProxy::use_iterator == false) { // direct counting - return arrayops::accumulate(P.get_values(), P.get_n_nonzero()); + return arrayops::accumulate(P.get_values(), N); } - else + + if(is_SpSubview::stored_type>::value) { - typename SpProxy::const_iterator_type it = P.begin(); + const SpSubview& sv = reinterpret_cast< const SpSubview& >(P.Q); - const uword P_n_nz = P.get_n_nonzero(); - - eT val = eT(0); - - for(uword i=0; i < P_n_nz; ++i) { val += (*it); ++it; } - - return val; + if(sv.n_rows == sv.m.n_rows) + { + const SpMat& m = sv.m; + const uword col = sv.aux_col1; + + return arrayops::accumulate(&(m.values[ m.col_ptrs[col] ]), N); + } } + + typename SpProxy::const_iterator_type it = P.begin(); + + eT val = eT(0); + + for(uword i=0; i < N; ++i) { val += (*it); ++it; } + + return val; } diff --git a/Include/armadillo/armadillo_bits/fn_as_scalar.hpp b/Include/armadillo/armadillo_bits/fn_as_scalar.hpp index 4c02004e5..35ef58cb4 100644 --- a/Include/armadillo/armadillo_bits/fn_as_scalar.hpp +++ b/Include/armadillo/armadillo_bits/fn_as_scalar.hpp @@ -328,40 +328,6 @@ as_scalar(const Base& X) } -template -arma_warn_unused -inline -typename T1::elem_type -as_scalar(const Gen& X) - { - arma_extra_debug_sigprint(); - - typedef typename T1::elem_type eT; - - arma_debug_check( ((X.n_rows != 1) || (X.n_cols != 1)), "as_scalar(): expression must evaluate to exactly one element" ); - - return eT(arma_rng::randu()); - } - - - -template -arma_warn_unused -inline -typename T1::elem_type -as_scalar(const Gen& X) - { - arma_extra_debug_sigprint(); - - typedef typename T1::elem_type eT; - - arma_debug_check( ((X.n_rows != 1) || (X.n_cols != 1)), "as_scalar(): expression must evaluate to exactly one element" ); - - return eT(arma_rng::randn()); - } - - - template arma_warn_unused inline diff --git a/Include/armadillo/armadillo_bits/fn_chol.hpp b/Include/armadillo/armadillo_bits/fn_chol.hpp index c65ef6221..dfd9e6e49 100644 --- a/Include/armadillo/armadillo_bits/fn_chol.hpp +++ b/Include/armadillo/armadillo_bits/fn_chol.hpp @@ -95,7 +95,7 @@ chol out = X.get_ref(); - arma_debug_check( (out.is_square() == false), "chol(): given matrix must be square sized" ); + arma_debug_check( (out.is_square() == false), "chol(): given matrix must be square sized", [&](){ out.soft_reset(); } ); if(out.is_empty()) { diff --git a/Include/armadillo/armadillo_bits/fn_cond.hpp b/Include/armadillo/armadillo_bits/fn_cond_rcond.hpp similarity index 95% rename from Include/armadillo/armadillo_bits/fn_cond.hpp rename to Include/armadillo/armadillo_bits/fn_cond_rcond.hpp index d5f96e1a2..fae0a06ad 100644 --- a/Include/armadillo/armadillo_bits/fn_cond.hpp +++ b/Include/armadillo/armadillo_bits/fn_cond_rcond.hpp @@ -29,7 +29,7 @@ cond(const Base& X) { arma_extra_debug_sigprint(); - return op_cond::cond(X.get_ref()); + return op_cond::apply(X.get_ref()); } @@ -42,7 +42,7 @@ rcond(const Base& X) { arma_extra_debug_sigprint(); - return op_cond::rcond(X.get_ref()); + return op_rcond::apply(X.get_ref()); } diff --git a/Include/armadillo/armadillo_bits/fn_eye.hpp b/Include/armadillo/armadillo_bits/fn_eye.hpp index 703a0a572..4252ffaa7 100644 --- a/Include/armadillo/armadillo_bits/fn_eye.hpp +++ b/Include/armadillo/armadillo_bits/fn_eye.hpp @@ -54,15 +54,8 @@ eye(const uword n_rows, const uword n_cols, const typename arma_Mat_Col_Row_only arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_Col::value) - { - arma_debug_check( (n_cols != 1), "eye(): incompatible size" ); - } - else - if(is_Row::value) - { - arma_debug_check( (n_rows != 1), "eye(): incompatible size" ); - } + if(is_Col::value) { arma_debug_check( (n_cols != 1), "eye(): incompatible size" ); } + if(is_Row::value) { arma_debug_check( (n_rows != 1), "eye(): incompatible size" ); } return Gen(n_rows, n_cols); } @@ -92,15 +85,8 @@ eye(const uword n_rows, const uword n_cols, const typename arma_SpMat_SpCol_SpRo arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_SpCol::value) - { - arma_debug_check( (n_cols != 1), "eye(): incompatible size" ); - } - else - if(is_SpRow::value) - { - arma_debug_check( (n_rows != 1), "eye(): incompatible size" ); - } + if(is_SpCol::value) { arma_debug_check( (n_cols != 1), "eye(): incompatible size" ); } + if(is_SpRow::value) { arma_debug_check( (n_rows != 1), "eye(): incompatible size" ); } obj_type out; diff --git a/Include/armadillo/armadillo_bits/fn_find.hpp b/Include/armadillo/armadillo_bits/fn_find.hpp index f86cc7555..5efb254b9 100644 --- a/Include/armadillo/armadillo_bits/fn_find.hpp +++ b/Include/armadillo/armadillo_bits/fn_find.hpp @@ -251,6 +251,24 @@ find_nonfinite(const T1& X) +template +arma_warn_unused +inline +typename +enable_if2 + < + is_arma_type::value, + const mtOp + >::result +find_nan(const T1& X) + { + arma_extra_debug_sigprint(); + + return mtOp(X); + } + + + // @@ -293,6 +311,25 @@ find_nonfinite(const BaseCube& X) +template +arma_warn_unused +inline +uvec +find_nan(const BaseCube& X) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const unwrap_cube tmp(X.get_ref()); + + const Mat R( const_cast< eT* >(tmp.M.memptr()), tmp.M.n_elem, 1, false ); + + return find_nan(R); + } + + + // @@ -385,4 +422,48 @@ find_nonfinite(const SpBase& X) +template +arma_warn_unused +inline +Col +find_nan(const SpBase& X) + { + arma_extra_debug_sigprint(); + + const SpProxy P(X.get_ref()); + + const uword n_rows = P.get_n_rows(); + const uword n_nz = P.get_n_nonzero(); + + Mat tmp(n_nz, 1, arma_nozeros_indicator()); + + uword* tmp_mem = tmp.memptr(); + + typename SpProxy::const_iterator_type it = P.begin(); + + uword count = 0; + + for(uword i=0; i out; + + if(count > 0) { out.steal_mem_col(tmp, count); } + + return out; + } + + + //! @} diff --git a/Include/armadillo/armadillo_bits/fn_inv.hpp b/Include/armadillo/armadillo_bits/fn_inv.hpp index cb603a44b..65589f742 100644 --- a/Include/armadillo/armadillo_bits/fn_inv.hpp +++ b/Include/armadillo/armadillo_bits/fn_inv.hpp @@ -115,10 +115,17 @@ inv { arma_extra_debug_sigprint(); - const bool status = op_inv_gen_rcond::apply_direct(out_inv, out_rcond, X.get_ref()); + typedef typename T1::pod_type T; + + op_inv_gen_state inv_state; + + const bool status = op_inv_gen_rcond::apply_direct(out_inv, inv_state, X.get_ref()); + + out_rcond = inv_state.rcond; if(status == false) { + out_rcond = T(0); out_inv.soft_reset(); arma_debug_warn_level(3, "inv(): matrix is singular"); } diff --git a/Include/armadillo/armadillo_bits/fn_inv_sympd.hpp b/Include/armadillo/armadillo_bits/fn_inv_sympd.hpp index ecec03009..ffd1d0d8a 100644 --- a/Include/armadillo/armadillo_bits/fn_inv_sympd.hpp +++ b/Include/armadillo/armadillo_bits/fn_inv_sympd.hpp @@ -115,10 +115,17 @@ inv_sympd { arma_extra_debug_sigprint(); - const bool status = op_inv_spd_rcond::apply_direct(out_inv, out_rcond, X.get_ref()); + typedef typename T1::pod_type T; + + op_inv_spd_state inv_state; + + const bool status = op_inv_spd_rcond::apply_direct(out_inv, inv_state, X.get_ref()); + + out_rcond = inv_state.rcond; if(status == false) { + out_rcond = T(0); out_inv.soft_reset(); arma_debug_warn_level(3, "inv_sympd(): matrix is singular or not positive definite"); } diff --git a/Include/armadillo/armadillo_bits/fn_norm.hpp b/Include/armadillo/armadillo_bits/fn_norm.hpp index 37ccac0ca..3dbcdb0fc 100644 --- a/Include/armadillo/armadillo_bits/fn_norm.hpp +++ b/Include/armadillo/armadillo_bits/fn_norm.hpp @@ -185,6 +185,20 @@ norm typedef typename T1::elem_type eT; typedef typename T1::pod_type T; + if(is_SpSubview_col::value) + { + const SpSubview_col& sv = reinterpret_cast< const SpSubview_col& >(expr); + + if(sv.n_rows == sv.m.n_rows) + { + const SpMat& m = sv.m; + const uword col = sv.aux_col1; + const eT* mem = &(m.values[ m.col_ptrs[col] ]); + + return spop_norm::vec_norm_k(mem, sv.n_nonzero, k); + } + } + const unwrap_spmat U(expr); const SpMat& X = U.M; @@ -194,17 +208,7 @@ norm if(is_vec) { - // create a fake dense vector to allow reuse of code for dense vectors - Col fake_vector( access::rwp(X.values), X.n_nonzero, false ); - - const Proxy< Col > P_fake_vector(fake_vector); - - if(k == uword(1)) { return op_norm::vec_norm_1(P_fake_vector); } - if(k == uword(2)) { return op_norm::vec_norm_2(P_fake_vector); } - - arma_debug_check( (k == 0), "norm(): k must be greater than zero" ); - - return op_norm::vec_norm_k(P_fake_vector, int(k)); + return spop_norm::vec_norm_k(X.values, X.n_nonzero, k); } else { diff --git a/Include/armadillo/armadillo_bits/fn_ones.hpp b/Include/armadillo/armadillo_bits/fn_ones.hpp index cacadd9f8..ae8b62200 100644 --- a/Include/armadillo/armadillo_bits/fn_ones.hpp +++ b/Include/armadillo/armadillo_bits/fn_ones.hpp @@ -43,14 +43,10 @@ ones(const uword n_elem, const arma_empty_class junk1 = arma_empty_class(), cons arma_ignore(junk1); arma_ignore(junk2); - if(is_Row::value) - { - return Gen(1, n_elem); - } - else - { - return Gen(n_elem, 1); - } + const uword n_rows = (is_Row::value) ? uword(1) : n_elem; + const uword n_cols = (is_Row::value) ? n_elem : uword(1); + + return Gen(n_rows, n_cols); } @@ -88,15 +84,8 @@ ones(const uword n_rows, const uword n_cols, const typename arma_Mat_Col_Row_onl arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_Col::value) - { - arma_debug_check( (n_cols != 1), "ones(): incompatible size" ); - } - else - if(is_Row::value) - { - arma_debug_check( (n_rows != 1), "ones(): incompatible size" ); - } + if(is_Col::value) { arma_debug_check( (n_cols != 1), "ones(): incompatible size" ); } + if(is_Row::value) { arma_debug_check( (n_rows != 1), "ones(): incompatible size" ); } return Gen(n_rows, n_cols); } diff --git a/Include/armadillo/armadillo_bits/fn_pinv.hpp b/Include/armadillo/armadillo_bits/fn_pinv.hpp index 4bd4ac4fc..6a8732273 100644 --- a/Include/armadillo/armadillo_bits/fn_pinv.hpp +++ b/Include/armadillo/armadillo_bits/fn_pinv.hpp @@ -21,6 +21,22 @@ +template +arma_warn_unused +inline +typename enable_if2< is_real::value, const Op >::result +pinv + ( + const Base& X + ) + { + arma_extra_debug_sigprint(); + + return Op(X.get_ref()); + } + + + template arma_warn_unused inline @@ -28,7 +44,7 @@ typename enable_if2< is_real::value, const Op& X, - const typename T1::pod_type tol = 0.0, + const typename T1::pod_type tol, const char* method = nullptr ) { diff --git a/Include/armadillo/armadillo_bits/fn_powext.hpp b/Include/armadillo/armadillo_bits/fn_powext.hpp new file mode 100644 index 000000000..a971219d6 --- /dev/null +++ b/Include/armadillo/armadillo_bits/fn_powext.hpp @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup fn_powext +//! @{ + + + +template +arma_warn_unused +arma_inline +typename +enable_if2 + < + is_arma_type::value, + const Glue + >::result +pow + ( + const T1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + return Glue(X, Y.get_ref()); + } + + + +template +arma_warn_unused +inline +Mat +pow + ( + const subview_each1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + return glue_powext::apply(X,Y); + } + + + +template +arma_warn_unused +arma_inline +const GlueCube +pow + ( + const BaseCube& X, + const BaseCube& Y + ) + { + arma_extra_debug_sigprint(); + + return GlueCube(X.get_ref(), Y.get_ref()); + } + + + +template +arma_warn_unused +inline +Cube +pow + ( + const subview_cube_each1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + return glue_powext::apply(X,Y); + } + + + +// + + + +template +arma_warn_unused +arma_inline +typename +enable_if2 + < + ( is_arma_type::value && is_cx::yes ), + const mtGlue + >::result +pow + ( + const T1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + return mtGlue(X, Y.get_ref()); + } + + + +template +arma_warn_unused +inline +typename +enable_if2 + < + is_cx::yes, + Mat + >::result +pow + ( + const subview_each1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + return glue_powext_cx::apply(X,Y); + } + + + +template +arma_warn_unused +arma_inline +const mtGlueCube +pow + ( + const BaseCube< std::complex, T1>& X, + const BaseCube< typename T1::pod_type , T2>& Y + ) + { + arma_extra_debug_sigprint(); + + return mtGlueCube(X.get_ref(), Y.get_ref()); + } + + + +template +arma_warn_unused +inline +Cube< std::complex > +pow + ( + const subview_cube_each1< std::complex >& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + return glue_powext_cx::apply(X,Y); + } + + + +//! @} diff --git a/Include/armadillo/armadillo_bits/fn_randg.hpp b/Include/armadillo/armadillo_bits/fn_randg.hpp index 400bda348..637994cd6 100644 --- a/Include/armadillo/armadillo_bits/fn_randg.hpp +++ b/Include/armadillo/armadillo_bits/fn_randg.hpp @@ -42,29 +42,14 @@ randg(const uword n_rows, const uword n_cols, const distr_param& param = distr_p arma_debug_check( (n_rows != 1), "randg(): incompatible size" ); } - obj_type out(n_rows, n_cols, arma_nozeros_indicator()); + double a = double(1); + double b = double(1); - double a; - double b; + param.get_double_vals(a,b); - if(param.state == 0) - { - a = double(1); - b = double(1); - } - else - if(param.state == 1) - { - a = double(param.a_int); - b = double(param.b_int); - } - else - { - a = param.a_double; - b = param.b_double; - } + arma_debug_check( ((a <= double(0)) || (b <= double(0))), "randg(): incorrect distribution parameters; a and b must be greater than zero" ); - arma_debug_check( ((a <= double(0)) || (b <= double(0))), "randg(): a and b must be greater than zero" ); + obj_type out(n_rows, n_cols, arma_nozeros_indicator()); arma_rng::randg::fill(out.memptr(), out.n_elem, a, b); @@ -175,29 +160,14 @@ randg(const uword n_rows, const uword n_cols, const uword n_slices, const distr_ typedef typename cube_type::elem_type eT; - cube_type out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); + double a = double(1); + double b = double(1); - double a; - double b; + param.get_double_vals(a,b); - if(param.state == 0) - { - a = double(1); - b = double(1); - } - else - if(param.state == 1) - { - a = double(param.a_int); - b = double(param.b_int); - } - else - { - a = param.a_double; - b = param.b_double; - } + arma_debug_check( ((a <= double(0)) || (b <= double(0))), "randg(): incorrect distribution parameters; a and b must be greater than zero" ); - arma_debug_check( ((a <= double(0)) || (b <= double(0))), "randg(): a and b must be greater than zero" ); + cube_type out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); arma_rng::randg::fill(out.memptr(), out.n_elem, a, b); diff --git a/Include/armadillo/armadillo_bits/fn_randi.hpp b/Include/armadillo/armadillo_bits/fn_randi.hpp index 91b24c263..5bfb9bf30 100644 --- a/Include/armadillo/armadillo_bits/fn_randi.hpp +++ b/Include/armadillo/armadillo_bits/fn_randi.hpp @@ -42,29 +42,14 @@ randi(const uword n_rows, const uword n_cols, const distr_param& param = distr_p arma_debug_check( (n_rows != 1), "randi(): incompatible size" ); } - obj_type out(n_rows, n_cols, arma_nozeros_indicator()); + int a = 0; + int b = arma_rng::randi::max_val(); - int a; - int b; + param.get_int_vals(a,b); - if(param.state == 0) - { - a = 0; - b = arma_rng::randi::max_val(); - } - else - if(param.state == 1) - { - a = param.a_int; - b = param.b_int; - } - else - { - a = int(param.a_double); - b = int(param.b_double); - } + arma_debug_check( (a > b), "randi(): incorrect distribution parameters; a must be less than b" ); - arma_debug_check( (a > b), "randi(): incorrect distribution parameters: a must be less than b" ); + obj_type out(n_rows, n_cols, arma_nozeros_indicator()); arma_rng::randi::fill(out.memptr(), out.n_elem, a, b); @@ -198,29 +183,14 @@ randi(const uword n_rows, const uword n_cols, const uword n_slices, const distr_ typedef typename cube_type::elem_type eT; - cube_type out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); + int a = 0; + int b = arma_rng::randi::max_val(); - int a; - int b; + param.get_int_vals(a,b); - if(param.state == 0) - { - a = 0; - b = arma_rng::randi::max_val(); - } - else - if(param.state == 1) - { - a = param.a_int; - b = param.b_int; - } - else - { - a = int(param.a_double); - b = int(param.b_double); - } + arma_debug_check( (a > b), "randi(): incorrect distribution parameters; a must be less than b" ); - arma_debug_check( (a > b), "randi(): incorrect distribution parameters: a must be less than b" ); + cube_type out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); arma_rng::randi::fill(out.memptr(), out.n_elem, a, b); diff --git a/Include/armadillo/armadillo_bits/fn_randn.hpp b/Include/armadillo/armadillo_bits/fn_randn.hpp index 63ca69d5a..ca868f4a1 100644 --- a/Include/armadillo/armadillo_bits/fn_randn.hpp +++ b/Include/armadillo/armadillo_bits/fn_randn.hpp @@ -21,6 +21,8 @@ +// scalars + arma_warn_unused inline double @@ -42,148 +44,308 @@ randn() -//! Generate a vector with all elements set to random values with a gaussian distribution (zero mean, unit variance) arma_warn_unused -arma_inline -const Gen -randn(const uword n_elem) +inline +double +randn(const distr_param& param) { arma_extra_debug_sigprint(); - return Gen(n_elem, 1); + if(param.state == 0) { return double(arma_rng::randn()); } + + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + const double val = double(arma_rng::randn()); + + return ((val * sd) + mu); + } + + + +template +arma_warn_unused +inline +typename arma_real_or_cx_only::result +randn(const distr_param& param) + { + arma_extra_debug_sigprint(); + + if(param.state == 0) { return eT(arma_rng::randn()); } + + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + eT val = eT(0); + + arma_rng::randn::fill(&val, 1, mu, sd); // using fill() as eT can be complex + + return val; + } + + + +// vectors + +arma_warn_unused +inline +vec +randn(const uword n_elem, const distr_param& param = distr_param()) + { + arma_extra_debug_sigprint(); + + vec out(n_elem, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randn::fill(out.memptr(), n_elem); + } + else + { + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + arma_rng::randn::fill(out.memptr(), n_elem, mu, sd); + } + + return out; } template arma_warn_unused -arma_inline -const Gen -randn(const uword n_elem, const arma_empty_class junk1 = arma_empty_class(), const typename arma_Mat_Col_Row_only::result* junk2 = nullptr) +inline +obj_type +randn(const uword n_elem, const distr_param& param = distr_param(), const typename arma_Mat_Col_Row_only::result* junk = nullptr) { arma_extra_debug_sigprint(); - arma_ignore(junk1); - arma_ignore(junk2); + arma_ignore(junk); + + typedef typename obj_type::elem_type eT; const uword n_rows = (is_Row::value) ? uword(1) : n_elem; const uword n_cols = (is_Row::value) ? n_elem : uword(1); - return Gen(n_rows, n_cols); + obj_type out(n_rows, n_cols, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randn::fill(out.memptr(), out.n_elem); + } + else + { + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + arma_rng::randn::fill(out.memptr(), out.n_elem, mu, sd); + } + + return out; } -//! Generate a dense matrix with all elements set to random values with a gaussian distribution (zero mean, unit variance) +// matrices + arma_warn_unused -arma_inline -const Gen -randn(const uword n_rows, const uword n_cols) +inline +mat +randn(const uword n_rows, const uword n_cols, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return Gen(n_rows, n_cols); + mat out(n_rows, n_cols, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randn::fill(out.memptr(), out.n_elem); + } + else + { + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + arma_rng::randn::fill(out.memptr(), out.n_elem, mu, sd); + } + + return out; } arma_warn_unused -arma_inline -const Gen -randn(const SizeMat& s) +inline +mat +randn(const SizeMat& s, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return Gen(s.n_rows, s.n_cols); + return randn(s.n_rows, s.n_cols, param); } template arma_warn_unused -arma_inline -const Gen -randn(const uword n_rows, const uword n_cols, const typename arma_Mat_Col_Row_only::result* junk = nullptr) +inline +obj_type +randn(const uword n_rows, const uword n_cols, const distr_param& param = distr_param(), const typename arma_Mat_Col_Row_only::result* junk = nullptr) { arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_Col::value) + typedef typename obj_type::elem_type eT; + + if(is_Col::value) { arma_debug_check( (n_cols != 1), "randn(): incompatible size" ); } + if(is_Row::value) { arma_debug_check( (n_rows != 1), "randn(): incompatible size" ); } + + obj_type out(n_rows, n_cols, arma_nozeros_indicator()); + + if(param.state == 0) { - arma_debug_check( (n_cols != 1), "randn(): incompatible size" ); + arma_rng::randn::fill(out.memptr(), out.n_elem); } else - if(is_Row::value) { - arma_debug_check( (n_rows != 1), "randn(): incompatible size" ); + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + arma_rng::randn::fill(out.memptr(), out.n_elem, mu, sd); } - return Gen(n_rows, n_cols); + return out; } template arma_warn_unused -arma_inline -const Gen -randn(const SizeMat& s, const typename arma_Mat_Col_Row_only::result* junk = nullptr) +inline +obj_type +randn(const SizeMat& s, const distr_param& param = distr_param(), const typename arma_Mat_Col_Row_only::result* junk = nullptr) { arma_extra_debug_sigprint(); arma_ignore(junk); - return randn(s.n_rows, s.n_cols); + return randn(s.n_rows, s.n_cols, param); } +// cubes + + arma_warn_unused -arma_inline -const GenCube -randn(const uword n_rows, const uword n_cols, const uword n_slices) +inline +cube +randn(const uword n_rows, const uword n_cols, const uword n_slices, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return GenCube(n_rows, n_cols, n_slices); + cube out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randn::fill(out.memptr(), out.n_elem); + } + else + { + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + arma_rng::randn::fill(out.memptr(), out.n_elem, mu, sd); + } + + return out; } arma_warn_unused -arma_inline -const GenCube -randn(const SizeCube& s) +inline +cube +randn(const SizeCube& s, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return GenCube(s.n_rows, s.n_cols, s.n_slices); + return randn(s.n_rows, s.n_cols, s.n_slices, param); } template arma_warn_unused -arma_inline -const GenCube -randn(const uword n_rows, const uword n_cols, const uword n_slices, const typename arma_Cube_only::result* junk = nullptr) +inline +cube_type +randn(const uword n_rows, const uword n_cols, const uword n_slices, const distr_param& param = distr_param(), const typename arma_Cube_only::result* junk = nullptr) { - arma_extra_debug_sigprint(); + arma_extra_debug_sigprint(); arma_ignore(junk); - return GenCube(n_rows, n_cols, n_slices); + typedef typename cube_type::elem_type eT; + + cube_type out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randn::fill(out.memptr(), out.n_elem); + } + else + { + double mu = double(0); + double sd = double(1); + + param.get_double_vals(mu,sd); + + arma_debug_check( (sd <= double(0)), "randn(): incorrect distribution parameters; standard deviation must be > 0" ); + + arma_rng::randn::fill(out.memptr(), out.n_elem, mu, sd); + } + + return out; } template arma_warn_unused -arma_inline -const GenCube -randn(const SizeCube& s, const typename arma_Cube_only::result* junk = nullptr) +inline +cube_type +randn(const SizeCube& s, const distr_param& param = distr_param(), const typename arma_Cube_only::result* junk = nullptr) { - arma_extra_debug_sigprint(); + arma_extra_debug_sigprint(); arma_ignore(junk); - return GenCube(s.n_rows, s.n_cols, s.n_slices); + return randn(s.n_rows, s.n_cols, s.n_slices, param); } diff --git a/Include/armadillo/armadillo_bits/fn_randu.hpp b/Include/armadillo/armadillo_bits/fn_randu.hpp index 75d024c75..d5a0d383d 100644 --- a/Include/armadillo/armadillo_bits/fn_randu.hpp +++ b/Include/armadillo/armadillo_bits/fn_randu.hpp @@ -21,6 +21,8 @@ +// scalars + arma_warn_unused inline double @@ -42,148 +44,308 @@ randu() -//! Generate a vector with all elements set to random values in the [0,1] interval (uniform distribution) arma_warn_unused -arma_inline -const Gen -randu(const uword n_elem) +inline +double +randu(const distr_param& param) + { + arma_extra_debug_sigprint(); + + if(param.state == 0) { return double(arma_rng::randu()); } + + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + const double val = double(arma_rng::randu()); + + return ((val * (b - a)) + a); + } + + + +template +arma_warn_unused +inline +typename arma_real_or_cx_only::result +randu(const distr_param& param) + { + arma_extra_debug_sigprint(); + + if(param.state == 0) { return eT(arma_rng::randu()); } + + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + eT val = eT(0); + + arma_rng::randu::fill(&val, 1, a, b); // using fill() as eT can be complex + + return val; + } + + + +// vectors + +arma_warn_unused +inline +vec +randu(const uword n_elem, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return Gen(n_elem, 1); + vec out(n_elem, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randu::fill(out.memptr(), n_elem); + } + else + { + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + arma_rng::randu::fill(out.memptr(), n_elem, a, b); + } + + return out; } template arma_warn_unused -arma_inline -const Gen -randu(const uword n_elem, const arma_empty_class junk1 = arma_empty_class(), const typename arma_Mat_Col_Row_only::result* junk2 = nullptr) +inline +obj_type +randu(const uword n_elem, const distr_param& param = distr_param(), const typename arma_Mat_Col_Row_only::result* junk = nullptr) { arma_extra_debug_sigprint(); - arma_ignore(junk1); - arma_ignore(junk2); + arma_ignore(junk); + + typedef typename obj_type::elem_type eT; const uword n_rows = (is_Row::value) ? uword(1) : n_elem; const uword n_cols = (is_Row::value) ? n_elem : uword(1); - return Gen(n_rows, n_cols); + obj_type out(n_rows, n_cols, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randu::fill(out.memptr(), out.n_elem); + } + else + { + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + arma_rng::randu::fill(out.memptr(), out.n_elem, a, b); + } + + return out; } -//! Generate a dense matrix with all elements set to random values in the [0,1] interval (uniform distribution) +// matrices + arma_warn_unused -arma_inline -const Gen -randu(const uword n_rows, const uword n_cols) +inline +mat +randu(const uword n_rows, const uword n_cols, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return Gen(n_rows, n_cols); + mat out(n_rows, n_cols, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randu::fill(out.memptr(), out.n_elem); + } + else + { + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + arma_rng::randu::fill(out.memptr(), out.n_elem, a, b); + } + + return out; } arma_warn_unused -arma_inline -const Gen -randu(const SizeMat& s) +inline +mat +randu(const SizeMat& s, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return Gen(s.n_rows, s.n_cols); + return randu(s.n_rows, s.n_cols, param); } template arma_warn_unused -arma_inline -const Gen -randu(const uword n_rows, const uword n_cols, const typename arma_Mat_Col_Row_only::result* junk = nullptr) +inline +obj_type +randu(const uword n_rows, const uword n_cols, const distr_param& param = distr_param(), const typename arma_Mat_Col_Row_only::result* junk = nullptr) { arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_Col::value) + typedef typename obj_type::elem_type eT; + + if(is_Col::value) { arma_debug_check( (n_cols != 1), "randu(): incompatible size" ); } + if(is_Row::value) { arma_debug_check( (n_rows != 1), "randu(): incompatible size" ); } + + obj_type out(n_rows, n_cols, arma_nozeros_indicator()); + + if(param.state == 0) { - arma_debug_check( (n_cols != 1), "randu(): incompatible size" ); + arma_rng::randu::fill(out.memptr(), out.n_elem); } else - if(is_Row::value) { - arma_debug_check( (n_rows != 1), "randu(): incompatible size" ); + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + arma_rng::randu::fill(out.memptr(), out.n_elem, a, b); } - return Gen(n_rows, n_cols); + return out; } template arma_warn_unused -arma_inline -const Gen -randu(const SizeMat& s, const typename arma_Mat_Col_Row_only::result* junk = nullptr) +inline +obj_type +randu(const SizeMat& s, const distr_param& param = distr_param(), const typename arma_Mat_Col_Row_only::result* junk = nullptr) { arma_extra_debug_sigprint(); arma_ignore(junk); - return randu(s.n_rows, s.n_cols); + return randu(s.n_rows, s.n_cols, param); } +// cubes + + arma_warn_unused -arma_inline -const GenCube -randu(const uword n_rows, const uword n_cols, const uword n_slices) +inline +cube +randu(const uword n_rows, const uword n_cols, const uword n_slices, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return GenCube(n_rows, n_cols, n_slices); + cube out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randu::fill(out.memptr(), out.n_elem); + } + else + { + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + arma_rng::randu::fill(out.memptr(), out.n_elem, a, b); + } + + return out; } arma_warn_unused -arma_inline -const GenCube -randu(const SizeCube& s) +inline +cube +randu(const SizeCube& s, const distr_param& param = distr_param()) { arma_extra_debug_sigprint(); - return GenCube(s.n_rows, s.n_cols, s.n_slices); + return randu(s.n_rows, s.n_cols, s.n_slices, param); } template arma_warn_unused -arma_inline -const GenCube -randu(const uword n_rows, const uword n_cols, const uword n_slices, const typename arma_Cube_only::result* junk = nullptr) +inline +cube_type +randu(const uword n_rows, const uword n_cols, const uword n_slices, const distr_param& param = distr_param(), const typename arma_Cube_only::result* junk = nullptr) { arma_extra_debug_sigprint(); arma_ignore(junk); - return GenCube(n_rows, n_cols, n_slices); + typedef typename cube_type::elem_type eT; + + cube_type out(n_rows, n_cols, n_slices, arma_nozeros_indicator()); + + if(param.state == 0) + { + arma_rng::randu::fill(out.memptr(), out.n_elem); + } + else + { + double a = double(0); + double b = double(1); + + param.get_double_vals(a,b); + + arma_debug_check( (a >= b), "randu(): incorrect distribution parameters; a must be less than b" ); + + arma_rng::randu::fill(out.memptr(), out.n_elem, a, b); + } + + return out; } template arma_warn_unused -arma_inline -const GenCube -randu(const SizeCube& s, const typename arma_Cube_only::result* junk = nullptr) +inline +cube_type +randu(const SizeCube& s, const distr_param& param = distr_param(), const typename arma_Cube_only::result* junk = nullptr) { arma_extra_debug_sigprint(); arma_ignore(junk); - return GenCube(s.n_rows, s.n_cols, s.n_slices); + return randu(s.n_rows, s.n_cols, s.n_slices, param); } diff --git a/Include/armadillo/armadillo_bits/fn_speye.hpp b/Include/armadillo/armadillo_bits/fn_speye.hpp index a282ec137..48570be23 100644 --- a/Include/armadillo/armadillo_bits/fn_speye.hpp +++ b/Include/armadillo/armadillo_bits/fn_speye.hpp @@ -31,15 +31,8 @@ speye(const uword n_rows, const uword n_cols, const typename arma_SpMat_SpCol_Sp arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_SpCol::value) - { - arma_debug_check( (n_cols != 1), "speye(): incompatible size" ); - } - else - if(is_SpRow::value) - { - arma_debug_check( (n_rows != 1), "speye(): incompatible size" ); - } + if(is_SpCol::value) { arma_debug_check( (n_cols != 1), "speye(): incompatible size" ); } + if(is_SpRow::value) { arma_debug_check( (n_rows != 1), "speye(): incompatible size" ); } obj_type out; diff --git a/Include/armadillo/armadillo_bits/fn_spsolve.hpp b/Include/armadillo/armadillo_bits/fn_spsolve.hpp index ee04a870b..7b71347da 100644 --- a/Include/armadillo/armadillo_bits/fn_spsolve.hpp +++ b/Include/armadillo/armadillo_bits/fn_spsolve.hpp @@ -114,7 +114,7 @@ spsolve_helper arma_debug_warn_level(2, "spsolve(): system is singular (rcond: ", rcond, ")"); } - if( (status == true) && (rcond > T(0)) && (rcond < auxlib::epsilon_lapack(out)) ) + if( (status == true) && (rcond > T(0)) && (rcond < std::numeric_limits::epsilon()) ) { arma_debug_warn_level(2, "solve(): solution computed, but system is singular to working precision (rcond: ", rcond, ")"); } diff --git a/Include/armadillo/armadillo_bits/fn_trace.hpp b/Include/armadillo/armadillo_bits/fn_trace.hpp index 5d6dc5a69..8a15bac21 100644 --- a/Include/armadillo/armadillo_bits/fn_trace.hpp +++ b/Include/armadillo/armadillo_bits/fn_trace.hpp @@ -102,10 +102,7 @@ trace(const Glue& X) arma_debug_assert_trans_mul_size< partial_unwrap::do_trans, partial_unwrap::do_trans >(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication"); - if( (A.n_elem == 0) || (B.n_elem == 0) ) - { - return eT(0); - } + if( (A.n_elem == 0) || (B.n_elem == 0) ) { return eT(0); } const uword A_n_rows = A.n_rows; const uword A_n_cols = A.n_cols; @@ -224,10 +221,7 @@ trace(const Glue& X) arma_debug_assert_trans_mul_size< partial_unwrap::do_trans, partial_unwrap::do_trans >(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication"); - if( (A.n_elem == 0) || (B.n_elem == 0) ) - { - return eT(0); - } + if( (A.n_elem == 0) || (B.n_elem == 0) ) { return eT(0); } const uword A_n_rows = A.n_rows; const uword A_n_cols = A.n_cols; @@ -516,10 +510,7 @@ trace(const SpGlue& expr) arma_debug_assert_mul_size(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication"); - if( (A.n_nonzero == 0) || (B.n_nonzero == 0) ) - { - return eT(0); - } + if( (A.n_nonzero == 0) || (B.n_nonzero == 0) ) { return eT(0); } const uword N = (std::min)(A.n_rows, B.n_cols); @@ -576,10 +567,7 @@ trace(const SpGlue, T2, spglue_times>& expr) // NOTE: deliberately swapped A.n_rows and A.n_cols to take into account the requested transpose operation arma_debug_assert_mul_size(A.n_cols, A.n_rows, B.n_rows, B.n_cols, "matrix multiplication"); - if( (A.n_nonzero == 0) || (B.n_nonzero == 0) ) - { - return eT(0); - } + if( (A.n_nonzero == 0) || (B.n_nonzero == 0) ) { return eT(0); } const uword N = (std::min)(A.n_cols, B.n_cols); @@ -635,10 +623,7 @@ trace(const SpGlue, T2, spglue_times>& expr) // NOTE: deliberately swapped A.n_rows and A.n_cols to take into account the requested transpose operation arma_debug_assert_mul_size(A.n_cols, A.n_rows, B.n_rows, B.n_cols, "matrix multiplication"); - if( (A.n_nonzero == 0) || (B.n_nonzero == 0) ) - { - return eT(0); - } + if( (A.n_nonzero == 0) || (B.n_nonzero == 0) ) { return eT(0); } const uword N = (std::min)(A.n_cols, B.n_cols); diff --git a/Include/armadillo/armadillo_bits/fn_zeros.hpp b/Include/armadillo/armadillo_bits/fn_zeros.hpp index cdb47d718..5f0692236 100644 --- a/Include/armadillo/armadillo_bits/fn_zeros.hpp +++ b/Include/armadillo/armadillo_bits/fn_zeros.hpp @@ -43,14 +43,10 @@ zeros(const uword n_elem, const arma_empty_class junk1 = arma_empty_class(), con arma_ignore(junk1); arma_ignore(junk2); - if(is_Row::value) - { - return Gen(1, n_elem); - } - else - { - return Gen(n_elem, 1); - } + const uword n_rows = (is_Row::value) ? uword(1) : n_elem; + const uword n_cols = (is_Row::value) ? n_elem : uword(1); + + return Gen(n_rows, n_cols); } @@ -88,15 +84,8 @@ zeros(const uword n_rows, const uword n_cols, const typename arma_Mat_Col_Row_on arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_Col::value) - { - arma_debug_check( (n_cols != 1), "zeros(): incompatible size" ); - } - else - if(is_Row::value) - { - arma_debug_check( (n_rows != 1), "zeros(): incompatible size" ); - } + if(is_Col::value) { arma_debug_check( (n_cols != 1), "zeros(): incompatible size" ); } + if(is_Row::value) { arma_debug_check( (n_rows != 1), "zeros(): incompatible size" ); } return Gen(n_rows, n_cols); } @@ -178,15 +167,8 @@ zeros(const uword n_rows, const uword n_cols, const typename arma_SpMat_SpCol_Sp arma_extra_debug_sigprint(); arma_ignore(junk); - if(is_SpCol::value) - { - arma_debug_check( (n_cols != 1), "zeros(): incompatible size" ); - } - else - if(is_SpRow::value) - { - arma_debug_check( (n_rows != 1), "zeros(): incompatible size" ); - } + if(is_SpCol::value) { arma_debug_check( (n_cols != 1), "zeros(): incompatible size" ); } + if(is_SpRow::value) { arma_debug_check( (n_rows != 1), "zeros(): incompatible size" ); } return sp_obj_type(n_rows, n_cols); } diff --git a/Include/armadillo/armadillo_bits/glue_kron_meat.hpp b/Include/armadillo/armadillo_bits/glue_kron_meat.hpp index f12aa53da..c7c4ff639 100644 --- a/Include/armadillo/armadillo_bits/glue_kron_meat.hpp +++ b/Include/armadillo/armadillo_bits/glue_kron_meat.hpp @@ -125,24 +125,21 @@ glue_kron::apply(Mat& out, const Glue& typedef typename T1::elem_type eT; - const unwrap A_tmp(X.A); - const unwrap B_tmp(X.B); + const quasi_unwrap UA(X.A); + const quasi_unwrap UB(X.B); - const Mat& A = A_tmp.M; - const Mat& B = B_tmp.M; - - if( (&out != &A) && (&out != &B) ) - { - glue_kron::direct_kron(out, A, B); - } - else + if(UA.is_alias(out) || UB.is_alias(out)) { Mat tmp; - glue_kron::direct_kron(tmp, A, B); + glue_kron::direct_kron(tmp, UA.M, UB.M); out.steal_mem(tmp); } + else + { + glue_kron::direct_kron(out, UA.M, UB.M); + } } diff --git a/Include/armadillo/armadillo_bits/glue_powext_bones.hpp b/Include/armadillo/armadillo_bits/glue_powext_bones.hpp new file mode 100644 index 000000000..d5698c5ca --- /dev/null +++ b/Include/armadillo/armadillo_bits/glue_powext_bones.hpp @@ -0,0 +1,70 @@ + +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + + +//! \addtogroup glue_powext +//! @{ + + + +class glue_powext + : public traits_glue_or + { + public: + + template inline static void apply(Mat& out, const Glue& X); + + template inline static void apply(Mat& out, const Mat& A, const Mat& B); + + template inline static Mat apply(const subview_each1& X, const Base& Y); + + // + + template inline static void apply(Cube& out, const GlueCube& X); + + template inline static void apply(Cube& out, const Cube& A, const Cube& B); + + template inline static Cube apply(const subview_cube_each1& X, const Base& Y); + }; + + + +class glue_powext_cx + : public traits_glue_or + { + public: + + template inline static void apply(Mat& out, const mtGlue& X); + + template inline static void apply(Mat< std::complex >& out, const Mat< std::complex >& A, const Mat& B); + + template inline static Mat apply(const subview_each1& X, const Base& Y); + + // + + template inline static void apply(Cube& out, const mtGlueCube& X); + + template inline static void apply(Cube< std::complex >& out, const Cube< std::complex >& A, const Cube& B); + + template inline static Cube< std::complex > apply(const subview_cube_each1< std::complex >& X, const Base& Y); + }; + + + +//! @} diff --git a/Include/armadillo/armadillo_bits/glue_powext_meat.hpp b/Include/armadillo/armadillo_bits/glue_powext_meat.hpp new file mode 100644 index 000000000..700a2cfd7 --- /dev/null +++ b/Include/armadillo/armadillo_bits/glue_powext_meat.hpp @@ -0,0 +1,674 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + + +//! \addtogroup glue_powext +//! @{ + + +template +inline +void +glue_powext::apply(Mat& out, const Glue& X) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const quasi_unwrap UA(X.A); + const quasi_unwrap UB(X.B); + + const Mat& A = UA.M; + const Mat& B = UB.M; + + arma_debug_assert_same_size(A, B, "element-wise pow()"); + + const bool UA_bad_alias = UA.is_alias(out) && (UA.has_subview); // allow inplace operation + const bool UB_bad_alias = UB.is_alias(out); + + if(UA_bad_alias || UB_bad_alias) + { + Mat tmp; + + glue_powext::apply(tmp, A, B); + + out.steal_mem(tmp); + } + else + { + glue_powext::apply(out, A, B); + } + } + + + +template +inline +void +glue_powext::apply(Mat& out, const Mat& A, const Mat& B) + { + arma_extra_debug_sigprint(); + + out.set_size(A.n_rows, A.n_cols); + + const uword N = out.n_elem; + + eT* out_mem = out.memptr(); + const eT* A_mem = A.memptr(); + const eT* B_mem = B.memptr(); + + if( arma_config::openmp && mp_gate::eval(N) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = mp_thread_limit::get(); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i +inline +Mat +glue_powext::apply + ( + const subview_each1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + typedef typename parent::elem_type eT; + + const parent& A = X.P; + + const uword A_n_rows = A.n_rows; + const uword A_n_cols = A.n_cols; + + Mat out(A_n_rows, A_n_cols, arma_nozeros_indicator()); + + const quasi_unwrap tmp(Y.get_ref()); + const Mat& B = tmp.M; + + X.check_size(B); + + const eT* B_mem = B.memptr(); + + if(mode == 0) // each column + { + if( arma_config::openmp && mp_gate::eval(A.n_elem) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = int( (std::min)(uword(mp_thread_limit::get()), A_n_cols) ); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = eop_aux::pow(A_mem[row], B_mem[row]); + } + } + } + #endif + } + else + { + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = eop_aux::pow(A_mem[row], B_mem[row]); + } + } + } + } + + if(mode == 1) // each row + { + if( arma_config::openmp && mp_gate::eval(A.n_elem) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = int( (std::min)(uword(mp_thread_limit::get()), A_n_cols) ); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + const eT B_val = B_mem[i]; + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = eop_aux::pow(A_mem[row], B_val); + } + } + } + #endif + } + else + { + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + const eT B_val = B_mem[i]; + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = eop_aux::pow(A_mem[row], B_val); + } + } + } + } + + return out; + } + + + +template +inline +void +glue_powext::apply(Cube& out, const GlueCube& X) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + const unwrap_cube UA(X.A); + const unwrap_cube UB(X.B); + + const Cube& A = UA.M; + const Cube& B = UB.M; + + arma_debug_assert_same_size(A, B, "element-wise pow()"); + + if(UB.is_alias(out)) + { + Cube tmp; + + glue_powext::apply(tmp, A, B); + + out.steal_mem(tmp); + } + else + { + glue_powext::apply(out, A, B); + } + } + + + +template +inline +void +glue_powext::apply(Cube& out, const Cube& A, const Cube& B) + { + arma_extra_debug_sigprint(); + + out.set_size(A.n_rows, A.n_cols, A.n_slices); + + const uword N = out.n_elem; + + eT* out_mem = out.memptr(); + const eT* A_mem = A.memptr(); + const eT* B_mem = B.memptr(); + + if( arma_config::openmp && mp_gate::eval(N) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = mp_thread_limit::get(); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i +inline +Cube +glue_powext::apply + ( + const subview_cube_each1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + const Cube& A = X.P; + + const uword A_n_rows = A.n_rows; + const uword A_n_cols = A.n_cols; + const uword A_n_slices = A.n_slices; + + Cube out(A_n_rows, A_n_cols, A_n_slices, arma_nozeros_indicator()); + + const quasi_unwrap tmp(Y.get_ref()); + const Mat& B = tmp.M; + + X.check_size(B); + + const eT* B_mem = B.memptr(); + const uword B_n_elem = B.n_elem; + + if( arma_config::openmp && mp_gate::eval(A.n_elem) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = int( (std::min)(uword(mp_thread_limit::get()), A_n_slices) ); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword s=0; s < A_n_slices; ++s) + { + const eT* A_slice_mem = A.slice_memptr(s); + eT* out_slice_mem = out.slice_memptr(s); + + for(uword i=0; i < B_n_elem; ++i) + { + out_slice_mem[i] = eop_aux::pow(A_slice_mem[i], B_mem[i]); + } + } + } + #endif + } + else + { + for(uword s=0; s < A_n_slices; ++s) + { + const eT* A_slice_mem = A.slice_memptr(s); + eT* out_slice_mem = out.slice_memptr(s); + + for(uword i=0; i < B_n_elem; ++i) + { + out_slice_mem[i] = eop_aux::pow(A_slice_mem[i], B_mem[i]); + } + } + } + + return out; + } + + + +// + + + +template +inline +void +glue_powext_cx::apply(Mat& out, const mtGlue& X) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + typedef typename T1::pod_type T; + + const quasi_unwrap UA(X.A); + const quasi_unwrap UB(X.B); + + const Mat& A = UA.M; + const Mat< T>& B = UB.M; + + arma_debug_assert_same_size(A, B, "element-wise pow()"); + + if(UA.is_alias(out) && (UA.has_subview)) + { + Mat tmp; + + glue_powext_cx::apply(tmp, A, B); + + out.steal_mem(tmp); + } + else + { + glue_powext_cx::apply(out, A, B); + } + } + + + +template +inline +void +glue_powext_cx::apply(Mat< std::complex >& out, const Mat< std::complex >& A, const Mat& B) + { + arma_extra_debug_sigprint(); + + typedef typename std::complex eT; + + out.set_size(A.n_rows, A.n_cols); + + const uword N = out.n_elem; + + eT* out_mem = out.memptr(); + const eT* A_mem = A.memptr(); + const T* B_mem = B.memptr(); + + if( arma_config::openmp && mp_gate::eval(N) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = mp_thread_limit::get(); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i +inline +Mat +glue_powext_cx::apply + ( + const subview_each1& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + typedef typename parent::elem_type eT; + typedef typename parent::pod_type T; + + const parent& A = X.P; + + const uword A_n_rows = A.n_rows; + const uword A_n_cols = A.n_cols; + + Mat out(A_n_rows, A_n_cols, arma_nozeros_indicator()); + + const quasi_unwrap tmp(Y.get_ref()); + const Mat& B = tmp.M; + + X.check_size(B); + + const T* B_mem = B.memptr(); + + if(mode == 0) // each column + { + if( arma_config::openmp && mp_gate::eval(A.n_elem) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = int( (std::min)(uword(mp_thread_limit::get()), A_n_cols) ); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = std::pow(A_mem[row], B_mem[row]); + } + } + } + #endif + } + else + { + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = std::pow(A_mem[row], B_mem[row]); + } + } + } + } + + if(mode == 1) // each row + { + if( arma_config::openmp && mp_gate::eval(A.n_elem) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = int( (std::min)(uword(mp_thread_limit::get()), A_n_cols) ); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + const eT B_val = B_mem[i]; + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = std::pow(A_mem[row], B_val); + } + } + } + #endif + } + else + { + for(uword i=0; i < A_n_cols; ++i) + { + const eT* A_mem = A.colptr(i); + eT* out_mem = out.colptr(i); + + const eT B_val = B_mem[i]; + + for(uword row=0; row < A_n_rows; ++row) + { + out_mem[row] = std::pow(A_mem[row], B_val); + } + } + } + } + + return out; + } + + + +template +inline +void +glue_powext_cx::apply(Cube& out, const mtGlueCube& X) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + + typedef typename get_pod_type::result T; + + const unwrap_cube UA(X.A); + const unwrap_cube UB(X.B); + + const Cube& A = UA.M; + const Cube< T>& B = UB.M; + + arma_debug_assert_same_size(A, B, "element-wise pow()"); + + glue_powext_cx::apply(out, A, B); + } + + + +template +inline +void +glue_powext_cx::apply(Cube< std::complex >& out, const Cube< std::complex >& A, const Cube& B) + { + arma_extra_debug_sigprint(); + + typedef typename std::complex eT; + + out.set_size(A.n_rows, A.n_cols, A.n_slices); + + const uword N = out.n_elem; + + eT* out_mem = out.memptr(); + const eT* A_mem = A.memptr(); + const T* B_mem = B.memptr(); + + if( arma_config::openmp && mp_gate::eval(N) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = mp_thread_limit::get(); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword i=0; i +inline +Cube< std::complex > +glue_powext_cx::apply + ( + const subview_cube_each1< std::complex >& X, + const Base& Y + ) + { + arma_extra_debug_sigprint(); + + typedef typename std::complex eT; + + const Cube& A = X.P; + + const uword A_n_rows = A.n_rows; + const uword A_n_cols = A.n_cols; + const uword A_n_slices = A.n_slices; + + Cube out(A_n_rows, A_n_cols, A_n_slices, arma_nozeros_indicator()); + + const quasi_unwrap tmp(Y.get_ref()); + const Mat& B = tmp.M; + + X.check_size(B); + + const T* B_mem = B.memptr(); + const uword B_n_elem = B.n_elem; + + if( arma_config::openmp && mp_gate::eval(A.n_elem) ) + { + #if defined(ARMA_USE_OPENMP) + { + const int n_threads = int( (std::min)(uword(mp_thread_limit::get()), A_n_slices) ); + + #pragma omp parallel for schedule(static) num_threads(n_threads) + for(uword s=0; s < A_n_slices; ++s) + { + const eT* A_slice_mem = A.slice_memptr(s); + eT* out_slice_mem = out.slice_memptr(s); + + for(uword i=0; i < B_n_elem; ++i) + { + out_slice_mem[i] = std::pow(A_slice_mem[i], B_mem[i]); + } + } + } + #endif + } + else + { + for(uword s=0; s < A_n_slices; ++s) + { + const eT* A_slice_mem = A.slice_memptr(s); + eT* out_slice_mem = out.slice_memptr(s); + + for(uword i=0; i < B_n_elem; ++i) + { + out_slice_mem[i] = std::pow(A_slice_mem[i], B_mem[i]); + } + } + } + + return out; + } + + + +//! @} diff --git a/Include/armadillo/armadillo_bits/glue_solve_meat.hpp b/Include/armadillo/armadillo_bits/glue_solve_meat.hpp index 95f2f01ab..974112c79 100644 --- a/Include/armadillo/armadillo_bits/glue_solve_meat.hpp +++ b/Include/armadillo/armadillo_bits/glue_solve_meat.hpp @@ -80,7 +80,7 @@ glue_solve_gen_full::apply(Mat& out, const Glue inline bool -glue_solve_gen_full::apply(Mat& out, const Base& A_expr, const Base& B_expr, const uword flags) +glue_solve_gen_full::apply(Mat& actual_out, const Base& A_expr, const Base& B_expr, const uword flags) { arma_extra_debug_sigprint(); @@ -100,22 +100,25 @@ glue_solve_gen_full::apply(Mat& out, const Base& A_expr, const Base A = A_expr.get_ref(); @@ -130,9 +133,25 @@ glue_solve_gen_full::apply(Mat& out, const Base& A_expr, const Base::value && is_Mat::value) + { + const quasi_unwrap UA( A_expr.get_ref() ); + const quasi_unwrap UB( B_expr.get_ref() ); + + is_alias = UA.is_alias(actual_out) || UB.is_alias(actual_out); + } + + Mat tmp; + Mat& out = (is_alias) ? tmp : actual_out; + T rcond = T(0); bool status = false; @@ -195,6 +214,7 @@ glue_solve_gen_full::apply(Mat& out, const Base& A_expr, const Base& out, const Base& A_expr, const Base& out, const Base& A_expr, const Base& out, const Base& A_expr, const Base& out, const Base& A_expr, const Base T(0)) && (rcond < auxlib::epsilon_lapack(A)) ) - { - arma_debug_warn_level(2, "solve(): solution computed, but system is singular to working precision (rcond: ", rcond, ")"); - } - - - if( (status == false) && (no_approx == false) ) - { - arma_extra_debug_print("glue_solve_gen_full::apply(): solving rank deficient system"); - - if(rcond > T(0)) - { - arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); - } - else - { - arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); - } - - // TODO: conditionally recreate A: have a separate state flag which indicates whether A was previously overwritten - - A = A_expr.get_ref(); // as A may have been overwritten - - status = auxlib::solve_approx_svd(out, A, B_expr.get_ref()); // A is overwritten - } } else { @@ -328,33 +322,38 @@ glue_solve_gen_full::apply(Mat& out, const Base& A_expr, const Base T(0)) && (rcond < auxlib::epsilon_lapack(A)) ) + } + + + if( (status == true) && (fast == false) && (allow_ugly == false) && ((rcond < std::numeric_limits::epsilon()) || arma_isnan(rcond)) ) + { + status = false; + } + + + if( (status == false) && (no_approx == false) ) + { + arma_extra_debug_print("glue_solve_gen_full::apply(): solving rank deficient system"); + + if(rcond == T(0)) { - arma_debug_warn_level(2, "solve(): solution computed, but system is singular to working precision (rcond: ", rcond, ")"); + arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); } - - if( (status == false) && (no_approx == false) ) + else { - arma_extra_debug_print("glue_solve_gen_full::apply(): solving rank deficient system"); - - if(rcond > T(0)) - { - arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); - } - else - { - arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); - } - - A = A_expr.get_ref(); // as A was overwritten - - status = auxlib::solve_approx_svd(out, A, B_expr.get_ref()); // A is overwritten + arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); } + + // TODO: conditionally recreate A: have a separate state flag which indicates whether A was previously overwritten + + A = A_expr.get_ref(); // as A may have been overwritten + + status = auxlib::solve_approx_svd(out, A, B_expr.get_ref()); // A is overwritten } + if(is_alias) { actual_out.steal_mem(out); } return status; } @@ -392,22 +391,29 @@ glue_solve_tri_default::apply(Mat& actual_out, const Base& A_expr, co typedef typename get_pod_type::result T; - const bool triu = bool(flags & solve_opts::flag_triu); - const bool tril = bool(flags & solve_opts::flag_tril); - const bool allow_ugly = false; + const bool triu = bool(flags & solve_opts::flag_triu); + const bool tril = bool(flags & solve_opts::flag_tril); arma_extra_debug_print("glue_solve_tri_default::apply(): enabled flags:"); if(triu) { arma_extra_debug_print("triu"); } if(tril) { arma_extra_debug_print("tril"); } - const quasi_unwrap U(A_expr.get_ref()); - const Mat& A = U.M; + const quasi_unwrap UA(A_expr.get_ref()); + const Mat& A = UA.M; arma_debug_check( (A.is_square() == false), "solve(): matrix marked as triangular must be square sized" ); - const uword layout = (triu) ? uword(0) : uword(1); - const bool is_alias = U.is_alias(actual_out); + const uword layout = (triu) ? uword(0) : uword(1); + + bool is_alias = true; + + if(is_Mat::value) + { + const quasi_unwrap UB(B_expr.get_ref()); + + is_alias = UA.is_alias(actual_out) || UB.is_alias(actual_out); + } T rcond = T(0); bool status = false; @@ -415,11 +421,12 @@ glue_solve_tri_default::apply(Mat& actual_out, const Base& A_expr, co Mat tmp; Mat& out = (is_alias) ? tmp : actual_out; - status = auxlib::solve_trimat_rcond(out, rcond, A, B_expr.get_ref(), layout, allow_ugly); // A is not modified + status = auxlib::solve_trimat_rcond(out, rcond, A, B_expr.get_ref(), layout); // A is not modified + - if( (status == true) && (rcond > T(0)) && (rcond < auxlib::epsilon_lapack(A)) ) + if( (status == true) && ( (rcond < std::numeric_limits::epsilon()) || arma_isnan(rcond) ) ) { - arma_debug_warn_level(2, "solve(): solution computed, but system is singular to working precision (rcond: ", rcond, ")"); + status = false; } @@ -427,13 +434,13 @@ glue_solve_tri_default::apply(Mat& actual_out, const Base& A_expr, co { arma_extra_debug_print("glue_solve_tri_default::apply(): solving rank deficient system"); - if(rcond > T(0)) + if(rcond == T(0)) { - arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); + arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); } else { - arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); + arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); } Mat triA = (triu) ? trimatu(A) : trimatl(A); // trimatu() and trimatl() return the same type @@ -513,13 +520,21 @@ glue_solve_tri_full::apply(Mat& actual_out, const Base& A_expr, const if(likely_sympd) { arma_debug_warn_level(2, "solve(): option 'likely_sympd' ignored for triangular matrix"); } - const quasi_unwrap U(A_expr.get_ref()); - const Mat& A = U.M; + const quasi_unwrap UA(A_expr.get_ref()); + const Mat& A = UA.M; arma_debug_check( (A.is_square() == false), "solve(): matrix marked as triangular must be square sized" ); - const uword layout = (triu) ? uword(0) : uword(1); - const bool is_alias = U.is_alias(actual_out); + const uword layout = (triu) ? uword(0) : uword(1); + + bool is_alias = true; + + if(is_Mat::value) + { + const quasi_unwrap UB(B_expr.get_ref()); + + is_alias = UA.is_alias(actual_out) || UB.is_alias(actual_out); + } T rcond = T(0); bool status = false; @@ -533,12 +548,13 @@ glue_solve_tri_full::apply(Mat& actual_out, const Base& A_expr, const } else { - status = auxlib::solve_trimat_rcond(out, rcond, A, B_expr.get_ref(), layout, allow_ugly); // A is not modified + status = auxlib::solve_trimat_rcond(out, rcond, A, B_expr.get_ref(), layout); // A is not modified } - if( (status == true) && (rcond > T(0)) && (rcond < auxlib::epsilon_lapack(A)) ) + + if( (status == true) && (fast == false) && (allow_ugly == false) && ((rcond < std::numeric_limits::epsilon()) || arma_isnan(rcond)) ) { - arma_debug_warn_level(2, "solve(): solution computed, but system is singular to working precision (rcond: ", rcond, ")"); + status = false; } @@ -546,13 +562,13 @@ glue_solve_tri_full::apply(Mat& actual_out, const Base& A_expr, const { arma_extra_debug_print("glue_solve_tri_full::apply(): solving rank deficient system"); - if(rcond > T(0)) + if(rcond == T(0)) { - arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); + arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); } else { - arma_debug_warn_level(2, "solve(): system is singular; attempting approx solution"); + arma_debug_warn_level(2, "solve(): system is singular (rcond: ", rcond, "); attempting approx solution"); } Mat triA = (triu) ? trimatu(A) : trimatl(A); // trimatu() and trimatl() return the same type diff --git a/Include/armadillo/armadillo_bits/gmm_diag_meat.hpp b/Include/armadillo/armadillo_bits/gmm_diag_meat.hpp index 3ceee50e8..41c94cbcd 100644 --- a/Include/armadillo/armadillo_bits/gmm_diag_meat.hpp +++ b/Include/armadillo/armadillo_bits/gmm_diag_meat.hpp @@ -2159,6 +2159,10 @@ gmm_diag::km_iterate(const Mat& X, const uword max_iter, const bool verb } #else { + acc_hefts.zeros(); + acc_means.zeros(); + last_indx.zeros(); + uword* acc_hefts_mem = acc_hefts.memptr(); uword* last_indx_mem = last_indx.memptr(); diff --git a/Include/armadillo/armadillo_bits/gmm_full_meat.hpp b/Include/armadillo/armadillo_bits/gmm_full_meat.hpp index 4106801e0..3e3abc5c3 100644 --- a/Include/armadillo/armadillo_bits/gmm_full_meat.hpp +++ b/Include/armadillo/armadillo_bits/gmm_full_meat.hpp @@ -2188,6 +2188,10 @@ gmm_full::km_iterate(const Mat& X, const uword max_iter, const bool verb } #else { + acc_hefts.zeros(); + acc_means.zeros(); + last_indx.zeros(); + uword* acc_hefts_mem = acc_hefts.memptr(); uword* last_indx_mem = last_indx.memptr(); diff --git a/Include/armadillo/armadillo_bits/include_hdf5.hpp b/Include/armadillo/armadillo_bits/include_hdf5.hpp index a367c7f83..8b5b5884d 100644 --- a/Include/armadillo/armadillo_bits/include_hdf5.hpp +++ b/Include/armadillo/armadillo_bits/include_hdf5.hpp @@ -22,23 +22,48 @@ #define H5_USE_110_API #if !defined(ARMA_HDF5_INCLUDE_DIR) - #include + #if defined(__has_include) + #if __has_include() + #include + #else + #undef ARMA_USE_HDF5 + #undef ARMA_USE_HDF5_CMAKE + #pragma message ("WARNING: use of HDF5 disabled; hdf5.h header not found") + #endif + #else + #include + #endif #else + #undef ARMA_STR1 + #undef ARMA_STR2 + #undef ARMA_HDF5_HEADER + #define ARMA_STR1(x) x #define ARMA_STR2(x) ARMA_STR1(x) #define ARMA_HDF5_HEADER ARMA_STR2(ARMA_HDF5_INCLUDE_DIR)ARMA_STR2(hdf5.h) - #include ARMA_INCFILE_WRAP(ARMA_HDF5_HEADER) + #if defined(__has_include) + #if __has_include(ARMA_INCFILE_WRAP(ARMA_HDF5_HEADER)) + #include ARMA_INCFILE_WRAP(ARMA_HDF5_HEADER) + #else + #undef ARMA_USE_HDF5 + #undef ARMA_USE_HDF5_CMAKE + #pragma message ("WARNING: use of HDF5 disabled; hdf5.h header not found") + #endif + #else + #include ARMA_INCFILE_WRAP(ARMA_HDF5_HEADER) + #endif #undef ARMA_STR1 #undef ARMA_STR2 #undef ARMA_HDF5_HEADER #endif - - #if defined(H5_USE_16_API_DEFAULT) || defined(H5_USE_16_API) - #pragma message ("WARNING: disabling use of HDF5 due to its incompatible configuration") + + #if defined(H5_USE_16_API) || defined(H5_USE_16_API_DEFAULT) + #pragma message ("WARNING: use of HDF5 disabled; incompatible configuration: H5_USE_16_API or H5_USE_16_API_DEFAULT") #undef ARMA_USE_HDF5 - #undef ARMA_USE_HDF5_ALT + #undef ARMA_USE_HDF5_CMAKE #endif + #endif diff --git a/Include/armadillo/armadillo_bits/include_superlu.hpp b/Include/armadillo/armadillo_bits/include_superlu.hpp index 1ec40617e..43fa0a732 100644 --- a/Include/armadillo/armadillo_bits/include_superlu.hpp +++ b/Include/armadillo/armadillo_bits/include_superlu.hpp @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: Apache-2.0 +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause // // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) // Copyright 2008-2016 National ICT Australia (NICTA) @@ -54,8 +54,7 @@ #if defined(ARMA_USE_SUPERLU) - -#if defined(ARMA_USE_SUPERLU_HEADERS) || defined(ARMA_SUPERLU_INCLUDE_DIR) +#undef ARMA_SLU_HEADERS_FOUND // Since we need to suport float, double, cx_float and cx_double, // as well as preserve the sanity of the user, @@ -70,142 +69,173 @@ namespace arma { - namespace superlu { // slu_*defs.h has int typedefed to int_t. // I'll just write it as int for simplicity, where I can, but supermatrix.h needs int_t. typedef int int_t; - + } +} + +#if defined(ARMA_USE_SUPERLU_HEADERS) || defined(ARMA_SUPERLU_INCLUDE_DIR) + +namespace arma +{ +namespace superlu + { // Include supermatrix.h. This gives us SuperMatrix. // Put it in the slu namespace. // For versions of SuperLU I am familiar with, supermatrix.h does not include any other files. // Therefore, putting it in the superlu namespace is reasonably safe. // This same reasoning is true for superlu_enum_consts.h. + #undef ARMA_SLU_HEADER_A + #undef ARMA_SLU_HEADER_B + #if defined(ARMA_SUPERLU_INCLUDE_DIR) - #define ARMA_SLU_STR(x) x - #define ARMA_SLU_STR2(x) ARMA_SLU_STR(x) + #undef ARMA_SLU_STR1 + #undef ARMA_SLU_STR2 - #define ARMA_SLU_SUPERMATRIX_H ARMA_SLU_STR2(ARMA_SUPERLU_INCLUDE_DIR)ARMA_SLU_STR2(supermatrix.h) - #define ARMA_SLU_SUPERLU_ENUM_CONSTS_H ARMA_SLU_STR2(ARMA_SUPERLU_INCLUDE_DIR)ARMA_SLU_STR2(superlu_enum_consts.h) + #define ARMA_SLU_STR1(x) x + #define ARMA_SLU_STR2(x) ARMA_SLU_STR1(x) + + #define ARMA_SLU_HEADER_A ARMA_SLU_STR2(ARMA_SUPERLU_INCLUDE_DIR)ARMA_SLU_STR2(supermatrix.h) + #define ARMA_SLU_HEADER_B ARMA_SLU_STR2(ARMA_SUPERLU_INCLUDE_DIR)ARMA_SLU_STR2(superlu_enum_consts.h) #else - #define ARMA_SLU_SUPERMATRIX_H supermatrix.h - #define ARMA_SLU_SUPERLU_ENUM_CONSTS_H superlu_enum_consts.h + #define ARMA_SLU_HEADER_A supermatrix.h + #define ARMA_SLU_HEADER_B superlu_enum_consts.h #endif - #include ARMA_INCFILE_WRAP(ARMA_SLU_SUPERMATRIX_H) - #include ARMA_INCFILE_WRAP(ARMA_SLU_SUPERLU_ENUM_CONSTS_H) - - #undef ARMA_SLU_SUPERMATRIX_H - #undef ARMA_SLU_SUPERLU_ENUM_CONSTS_H - - - typedef struct - { - int* panel_histo; - double* utime; - float* ops; - int TinyPivots; - int RefineSteps; - int expansions; - } SuperLUStat_t; - - - typedef struct - { - fact_t Fact; - yes_no_t Equil; - colperm_t ColPerm; - trans_t Trans; - IterRefine_t IterRefine; - double DiagPivotThresh; - yes_no_t SymmetricMode; - yes_no_t PivotGrowth; - yes_no_t ConditionNumber; - rowperm_t RowPerm; - int ILU_DropRule; - double ILU_DropTol; - double ILU_FillFactor; - norm_t ILU_Norm; - double ILU_FillTol; - milu_t ILU_MILU; - double ILU_MILU_Dim; - yes_no_t ParSymbFact; - yes_no_t ReplaceTinyPivot; - yes_no_t SolveInitialized; - yes_no_t RefineInitialized; - yes_no_t PrintStat; - int nnzL, nnzU; - int num_lookaheads; - yes_no_t lookahead_etree; - yes_no_t SymPattern; - } superlu_options_t; - - - typedef struct - { - float for_lu; - float total_needed; - } mem_usage_t; - - - typedef struct e_node - { - int size; - void* mem; - } ExpHeader; - - - typedef struct - { - int size; - int used; - int top1; - int top2; - void* array; - } LU_stack_t; + #if defined(__has_include) + #if __has_include(ARMA_INCFILE_WRAP(ARMA_SLU_HEADER_A)) && __has_include(ARMA_INCFILE_WRAP(ARMA_SLU_HEADER_B)) + #include ARMA_INCFILE_WRAP(ARMA_SLU_HEADER_A) + #include ARMA_INCFILE_WRAP(ARMA_SLU_HEADER_B) + #define ARMA_SLU_HEADERS_FOUND + #endif + #else + #include ARMA_INCFILE_WRAP(ARMA_SLU_HEADER_A) + #include ARMA_INCFILE_WRAP(ARMA_SLU_HEADER_B) + #define ARMA_SLU_HEADERS_FOUND + #endif + #undef ARMA_SLU_STR1 + #undef ARMA_SLU_STR2 + + #undef ARMA_SLU_HEADER_A + #undef ARMA_SLU_HEADER_B - typedef struct - { - int* xsup; - int* supno; - int* lsub; - int* xlsub; - void* lusup; - int* xlusup; - void* ucol; - int* usub; - int* xusub; - int nzlmax; - int nzumax; - int nzlumax; - int n; - LU_space_t MemModel; - int num_expansions; - ExpHeader* expanders; - LU_stack_t stack; - } GlobalLU_t; + #if defined(ARMA_SLU_HEADERS_FOUND) + + typedef struct + { + int* panel_histo; + double* utime; + float* ops; + int TinyPivots; + int RefineSteps; + int expansions; + } SuperLUStat_t; + + typedef struct + { + fact_t Fact; + yes_no_t Equil; + colperm_t ColPerm; + trans_t Trans; + IterRefine_t IterRefine; + double DiagPivotThresh; + yes_no_t SymmetricMode; + yes_no_t PivotGrowth; + yes_no_t ConditionNumber; + rowperm_t RowPerm; + int ILU_DropRule; + double ILU_DropTol; + double ILU_FillFactor; + norm_t ILU_Norm; + double ILU_FillTol; + milu_t ILU_MILU; + double ILU_MILU_Dim; + yes_no_t ParSymbFact; + yes_no_t ReplaceTinyPivot; + yes_no_t SolveInitialized; + yes_no_t RefineInitialized; + yes_no_t PrintStat; + int nnzL, nnzU; + int num_lookaheads; + yes_no_t lookahead_etree; + yes_no_t SymPattern; + } superlu_options_t; + + typedef struct + { + float for_lu; + float total_needed; + } mem_usage_t; + + typedef struct e_node + { + int size; + void* mem; + } ExpHeader; + + typedef struct + { + int size; + int used; + int top1; + int top2; + void* array; + } LU_stack_t; + + typedef struct + { + int* xsup; + int* supno; + int* lsub; + int* xlsub; + void* lusup; + int* xlusup; + void* ucol; + int* usub; + int* xusub; + int nzlmax; + int nzumax; + int nzlumax; + int n; + LU_space_t MemModel; + int num_expansions; + ExpHeader* expanders; + LU_stack_t stack; + } GlobalLU_t; + + #endif } } -#else +#endif + +#if defined(ARMA_USE_SUPERLU_HEADERS) && !defined(ARMA_SLU_HEADERS_FOUND) + #undef ARMA_USE_SUPERLU + #pragma message ("WARNING: use of SuperLU disabled; required headers not found") +#endif + +#endif + + + +#if defined(ARMA_USE_SUPERLU) && !defined(ARMA_SLU_HEADERS_FOUND) // Not using any SuperLU headers, so define all required enums and structs. -// -// CAVEAT: -// This code requires SuperLU version 5.2, -// and assumes that newer 5.x versions will have no API changes. + +#if defined(ARMA_SUPERLU_INCLUDE_DIR) + #pragma message ("WARNING: SuperLU headers not found; using built-in definitions") +#endif namespace arma { - namespace superlu { - typedef int int_t; - typedef enum { SLU_NC, @@ -218,7 +248,6 @@ namespace superlu SLU_NR_loc } Stype_t; - typedef enum { SLU_S, @@ -227,7 +256,6 @@ namespace superlu SLU_Z } Dtype_t; - typedef enum { SLU_GE, @@ -241,7 +269,6 @@ namespace superlu SLU_HEU } Mtype_t; - typedef struct { Stype_t Stype; @@ -252,7 +279,6 @@ namespace superlu void* Store; } SuperMatrix; - typedef struct { int* panel_histo; @@ -263,7 +289,6 @@ namespace superlu int expansions; } SuperLUStat_t; - typedef enum {NO, YES} yes_no_t; typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t; typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t; @@ -275,7 +300,6 @@ namespace superlu typedef enum {ONE_NORM, TWO_NORM, INF_NORM} norm_t; typedef enum {SILU, SMILU_1, SMILU_2, SMILU_3} milu_t; - typedef struct { fact_t Fact; @@ -306,14 +330,12 @@ namespace superlu yes_no_t SymPattern; } superlu_options_t; - typedef struct { float for_lu; float total_needed; } mem_usage_t; - typedef struct { int_t nnz; @@ -322,21 +344,18 @@ namespace superlu int_t* colptr; } NCformat; - typedef struct { int_t lda; void* nzval; } DNformat; - typedef struct e_node { int size; void* mem; } ExpHeader; - typedef struct { int size; @@ -346,7 +365,6 @@ namespace superlu void* array; } LU_stack_t; - typedef struct { int* xsup; @@ -370,7 +388,6 @@ namespace superlu } } -#endif - +#undef ARMA_SLU_HEADERS_FOUND #endif diff --git a/Include/armadillo/armadillo_bits/injector_meat.hpp b/Include/armadillo/armadillo_bits/injector_meat.hpp index 76048533d..d509896da 100644 --- a/Include/armadillo/armadillo_bits/injector_meat.hpp +++ b/Include/armadillo/armadillo_bits/injector_meat.hpp @@ -274,6 +274,7 @@ mat_injector::end_of_row() const template arma_cold +inline const mat_injector& operator<<(const mat_injector& ref, const typename mat_injector::elem_type val) { @@ -288,6 +289,7 @@ operator<<(const mat_injector& ref, const typename mat_injector::elem_ty template arma_cold +inline const mat_injector& operator<<(const mat_injector& ref, const injector_end_of_row<>& x) { @@ -579,6 +581,7 @@ field_injector::end_of_row() const template arma_cold +inline const field_injector& operator<<(const field_injector& ref, const typename field_injector::object_type& val) { @@ -593,6 +596,7 @@ operator<<(const field_injector& ref, const typename field_injector::obj template arma_cold +inline const field_injector& operator<<(const field_injector& ref, const injector_end_of_row<>& x) { diff --git a/Include/armadillo/armadillo_bits/memory.hpp b/Include/armadillo/armadillo_bits/memory.hpp index 8e992235f..a85c503aa 100644 --- a/Include/armadillo/armadillo_bits/memory.hpp +++ b/Include/armadillo/armadillo_bits/memory.hpp @@ -77,6 +77,8 @@ memory::acquire(const uword n_elem) } #elif defined(_MSC_VER) { + // Windoze is too primitive to handle C++17 std::aligned_alloc() + //out_memptr = (eT *) malloc(sizeof(eT)*n_elem); //out_memptr = (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ); // lives in malloc.h diff --git a/Include/armadillo/armadillo_bits/newarp_SparseGenRealShiftSolve_meat.hpp b/Include/armadillo/armadillo_bits/newarp_SparseGenRealShiftSolve_meat.hpp index 5d85517b0..ea2061867 100644 --- a/Include/armadillo/armadillo_bits/newarp_SparseGenRealShiftSolve_meat.hpp +++ b/Include/armadillo/armadillo_bits/newarp_SparseGenRealShiftSolve_meat.hpp @@ -28,6 +28,9 @@ SparseGenRealShiftSolve::SparseGenRealShiftSolve(const SpMat& mat_obj, c , perm_r(mat_obj.n_rows + 1) , n_rows(mat_obj.n_rows) , n_cols(mat_obj.n_cols) + #else + : n_rows(0) + , n_cols(0) #endif { arma_extra_debug_sigprint(); @@ -74,8 +77,8 @@ SparseGenRealShiftSolve::SparseGenRealShiftSolve(const SpMat& mat_obj, c if( (x_rcond < std::numeric_limits::epsilon()) || arma_isnan(x_rcond) ) { - if(x_rcond > eT(0)) { arma_debug_warn_level(2, "matrix is singular to working precision (rcond: ", x_rcond, ")"); } - else { arma_debug_warn_level(2, "matrix is singular to working precision"); } + if(x_rcond == eT(0)) { arma_debug_warn_level(2, "matrix is singular to working precision"); } + else { arma_debug_warn_level(2, "matrix is singular to working precision (rcond: ", x_rcond, ")"); } return; } diff --git a/Include/armadillo/armadillo_bits/op_chol_meat.hpp b/Include/armadillo/armadillo_bits/op_chol_meat.hpp index 9bda648ab..ebc6448b0 100644 --- a/Include/armadillo/armadillo_bits/op_chol_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_chol_meat.hpp @@ -50,7 +50,7 @@ op_chol::apply_direct(Mat& out, const Base static inline typename T1::pod_type cond(const Base& X); - template static inline typename T1::pod_type rcond(const Base& X); + template static inline typename T1::pod_type apply(const Base& X); + + template static inline typename get_pod_type::result apply_diag(const Mat& A); + template static inline typename get_pod_type::result apply_sym ( Mat& A); + template static inline typename get_pod_type::result apply_gen ( Mat& A); }; diff --git a/Include/armadillo/armadillo_bits/op_cond_meat.hpp b/Include/armadillo/armadillo_bits/op_cond_meat.hpp index 242a9b34c..1d59f0333 100644 --- a/Include/armadillo/armadillo_bits/op_cond_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_cond_meat.hpp @@ -24,122 +24,149 @@ template inline typename T1::pod_type -op_cond::cond(const Base& X) +op_cond::apply(const Base& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; typedef typename T1::pod_type T; - // TODO: implement speed up for symmetric matrices, similar to op_pinv::apply_sym() - Mat A(X.get_ref()); - Col S; + if(A.n_elem == 0) { return T(0); } - const bool status = auxlib::svd_dc(S, A); + if(is_op_diagmat::value || A.is_diagmat()) + { + arma_extra_debug_print("op_cond::apply(): detected diagonal matrix"); + + return op_cond::apply_diag(A); + } - if(status == false) + bool is_approx_sym = false; + bool is_approx_sympd = false; + + sympd_helper::analyse_matrix(is_approx_sym, is_approx_sympd, A); + + const bool do_sym = (is_cx::no) ? (is_approx_sym) : (is_approx_sym && is_approx_sympd); + + if(do_sym) { - arma_debug_warn_level(3, "cond(): svd failed"); + arma_extra_debug_print("op_cond: symmetric/hermitian optimisation"); - return Datum::nan; + return op_cond::apply_sym(A); } - return (S.n_elem > 0) ? T( max(S) / min(S) ) : T(0); + return op_cond::apply_gen(A); } -template +template inline -typename T1::pod_type -op_cond::rcond(const Base& X) +typename get_pod_type::result +op_cond::apply_diag(const Mat& A) { arma_extra_debug_sigprint(); - typedef typename T1::elem_type eT; - typedef typename T1::pod_type T; + typedef typename get_pod_type::result T; + + const uword N = (std::min)(A.n_rows, A.n_cols); - if(strip_trimat::do_trimat) + T abs_min = Datum::inf; + T abs_max = T(0); + + for(uword i=0; i < N; ++i) { - const strip_trimat S(X.get_ref()); - - const quasi_unwrap::stored_type> U(S.M); - - arma_debug_check( (U.M.is_square() == false), "rcond(): matrix must be square sized" ); + const T abs_val = std::abs(A.at(i,i)); - const uword layout = (S.do_triu) ? uword(0) : uword(1); + if(arma_isnan(abs_val)) + { + arma_debug_warn_level(3, "cond(): failed"); + + return Datum::nan; + } - return auxlib::rcond_trimat(U.M, layout); + abs_min = (abs_val < abs_min) ? abs_val : abs_min; + abs_max = (abs_val > abs_max) ? abs_val : abs_max; } - Mat A = X.get_ref(); + if((abs_min == T(0)) || (abs_max == T(0))) { return Datum::inf; } - arma_debug_check( (A.is_square() == false), "rcond(): matrix must be square sized" ); + return T(abs_max / abs_min); + } + + + +template +inline +typename get_pod_type::result +op_cond::apply_sym(Mat& A) + { + arma_extra_debug_sigprint(); - if(A.is_empty()) { return Datum::inf; } + typedef typename get_pod_type::result T; - if(is_op_diagmat::value || A.is_diagmat()) + Col eigval; + + const bool status = auxlib::eig_sym(eigval, A); + + if(status == false) { - arma_extra_debug_print("op_cond::rcond(): detected diagonal matrix"); + arma_debug_warn_level(3, "cond(): failed"); - const eT* colmem = A.memptr(); - const uword N = A.n_rows; - - T max_abs_src_val = T(0); - T max_abs_inv_val = T(0); - - for(uword i=0; i max_abs_src_val) ? abs_src_val : max_abs_src_val; - max_abs_inv_val = (abs_inv_val > max_abs_inv_val) ? abs_inv_val : max_abs_inv_val; - - colmem += N; - } - - return T(1) / (max_abs_src_val * max_abs_inv_val); + return Datum::nan; } - const bool is_triu = trimat_helper::is_triu(A); - const bool is_tril = (is_triu) ? false : trimat_helper::is_tril(A); + if(eigval.n_elem == 0) { return T(0); } + + const T* eigval_mem = eigval.memptr(); - if(is_triu || is_tril) + T abs_min = std::abs(eigval_mem[0]); + T abs_max = abs_min; + + for(uword i=1; i < eigval.n_elem; ++i) { - const uword layout = (is_triu) ? uword(0) : uword(1); + const T abs_val = std::abs(eigval_mem[i]); - return auxlib::rcond_trimat(A, layout); + abs_min = (abs_val < abs_min) ? abs_val : abs_min; + abs_max = (abs_val > abs_max) ? abs_val : abs_max; } - const bool try_sympd = arma_config::optimise_sympd && (auxlib::crippled_lapack(A) ? false : sympd_helper::guess_sympd(A)); + if((abs_min == T(0)) || (abs_max == T(0))) { return Datum::inf; } + + return T(abs_max / abs_min); + } + + + +template +inline +typename get_pod_type::result +op_cond::apply_gen(Mat& A) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type::result T; - if(try_sympd) + Col S; + + const bool status = auxlib::svd_dc(S, A); + + if(status == false) { - arma_extra_debug_print("op_cond::rcond(): attempting sympd optimisation"); + arma_debug_warn_level(3, "cond(): failed"); - bool calc_ok = false; - - const T out_val = auxlib::rcond_sympd(A, calc_ok); - - if(calc_ok) { return out_val; } - - arma_extra_debug_print("op_cond::rcond(): sympd optimisation failed"); - - // auxlib::rcond_sympd() may have failed because A isn't really sympd - // restore A, as auxlib::rcond_sympd() may have destroyed it - A = X.get_ref(); - // fallthrough to the next return statement + return Datum::nan; } - return auxlib::rcond(A); + if(S.n_elem == 0) { return T(0); } + + const T S_max = S[0]; + const T S_min = S[S.n_elem-1]; + + if((S_max == T(0)) || (S_min == T(0))) { return Datum::inf; } + + return T(S_max / S_min); } diff --git a/Include/armadillo/armadillo_bits/op_expmat_meat.hpp b/Include/armadillo/armadillo_bits/op_expmat_meat.hpp index 57d8874d9..b6e4b65be 100644 --- a/Include/armadillo/armadillo_bits/op_expmat_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_expmat_meat.hpp @@ -60,7 +60,7 @@ op_expmat::apply_direct(Mat& out, const Base + inline static void apply(Mat& out, const mtOp& X); + }; + + + //! @} diff --git a/Include/armadillo/armadillo_bits/op_find_meat.hpp b/Include/armadillo/armadillo_bits/op_find_meat.hpp index facbf1ce9..42ac67c5e 100644 --- a/Include/armadillo/armadillo_bits/op_find_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_find_meat.hpp @@ -605,4 +605,50 @@ op_find_nonfinite::apply(Mat& out, const mtOp +inline +void +op_find_nan::apply(Mat& out, const mtOp& X) + { + arma_extra_debug_sigprint(); + + const Proxy P(X.m); + + const uword n_elem = P.get_n_elem(); + + Mat indices(n_elem, 1, arma_nozeros_indicator()); + + uword* indices_mem = indices.memptr(); + uword count = 0; + + if(Proxy::use_at == false) + { + const typename Proxy::ea_type Pea = P.get_ea(); + + for(uword i=0; i +struct op_inv_gen_state + { + T rcond = T(0); + bool is_diag = false; + bool is_sym = false; + }; + + + class op_inv_gen_rcond : public traits_op_default { public: template - inline static bool apply_direct(Mat& out_inv, typename T1::pod_type& out_rcond, const Base& expr); + inline static bool apply_direct(Mat& out_inv, op_inv_gen_state& out_state, const Base& expr); }; @@ -110,18 +120,21 @@ namespace inv_opts static constexpr uword flag_allow_approx = uword(1u << 1); static constexpr uword flag_likely_sympd = uword(1u << 2); static constexpr uword flag_no_sympd = uword(1u << 3); + static constexpr uword flag_no_ugly = uword(1u << 4); struct opts_none : public opts { inline opts_none() : opts(flag_none ) {} }; struct opts_tiny : public opts { inline opts_tiny() : opts(flag_tiny ) {} }; struct opts_allow_approx : public opts { inline opts_allow_approx() : opts(flag_allow_approx) {} }; struct opts_likely_sympd : public opts { inline opts_likely_sympd() : opts(flag_likely_sympd) {} }; struct opts_no_sympd : public opts { inline opts_no_sympd() : opts(flag_no_sympd ) {} }; + struct opts_no_ugly : public opts { inline opts_no_ugly() : opts(flag_no_ugly ) {} }; static const opts_none none; static const opts_tiny tiny; static const opts_allow_approx allow_approx; static const opts_likely_sympd likely_sympd; static const opts_no_sympd no_sympd; + static const opts_no_ugly no_ugly; } diff --git a/Include/armadillo/armadillo_bits/op_inv_gen_meat.hpp b/Include/armadillo/armadillo_bits/op_inv_gen_meat.hpp index 914a7e7c8..dfb903987 100644 --- a/Include/armadillo/armadillo_bits/op_inv_gen_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_inv_gen_meat.hpp @@ -62,7 +62,7 @@ op_inv_gen_full::apply(Mat& out, const Op& out, const Base inv_state; + + const bool status = op_inv_gen_rcond::apply_direct(out, inv_state, expr); + + const T local_rcond = inv_state.rcond; // workaround for bug in gcc 4.8 + + if((status == false) || (local_rcond < std::numeric_limits::epsilon()) || arma_isnan(local_rcond)) { return false; } + + return true; + } if(allow_approx) { - T rcond = T(0); + op_inv_gen_state inv_state; Mat tmp; - const bool status = op_inv_gen_rcond::apply_direct(tmp, rcond, expr); + const bool status = op_inv_gen_rcond::apply_direct(tmp, inv_state, expr); - if((status == false) || (rcond < auxlib::epsilon_lapack(tmp))) + const T local_rcond = inv_state.rcond; // workaround for bug in gcc 4.8 + + if((status == false) || (local_rcond < std::numeric_limits::epsilon()) || arma_isnan(local_rcond)) { Mat A = expr.get_ref(); + if(inv_state.is_diag) { return op_pinv::apply_diag(out, A, T(0) ); } + if(inv_state.is_sym ) { return op_pinv::apply_sym (out, A, T(0), uword(0)); } + return op_pinv::apply_gen(out, A, T(0), uword(0)); } @@ -124,7 +148,7 @@ op_inv_gen_full::apply_direct(Mat& out, const Base& X) template inline bool -op_inv_gen_rcond::apply_direct(Mat& out, typename T1::pod_type& out_rcond, const Base& expr) +op_inv_gen_rcond::apply_direct(Mat& out, op_inv_gen_state& out_state, const Base& expr) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; typedef typename T1::pod_type T; - out = expr.get_ref(); - out_rcond = T(0); + out = expr.get_ref(); + out_state.rcond = T(0); - arma_debug_check( (out.is_square() == false), "inv(): given matrix must be square sized" ); - - const uword N = out.n_rows; + arma_debug_check( (out.is_square() == false), "inv(): given matrix must be square sized", [&](){ out.soft_reset(); } ); if(is_op_diagmat::value || out.is_diagmat()) { arma_extra_debug_print("op_inv_gen_rcond: detected diagonal matrix"); + out_state.is_diag = true; + eT* colmem = out.memptr(); T max_abs_src_val = T(0); T max_abs_inv_val = T(0); + const uword N = out.n_rows; + for(uword i=0; i& out, typename T1::po colmem += N; } - out_rcond = T(1) / (max_abs_src_val * max_abs_inv_val); + out_state.rcond = T(1) / (max_abs_src_val * max_abs_inv_val); return true; } @@ -433,7 +459,7 @@ op_inv_gen_rcond::apply_direct(Mat& out, typename T1::po if(is_triu_expr || is_tril_expr || is_triu_mat || is_tril_mat) { - return auxlib::inv_tr_rcond(out, out_rcond, ((is_triu_expr || is_triu_mat) ? uword(0) : uword(1))); + return auxlib::inv_tr_rcond(out, out_state.rcond, ((is_triu_expr || is_triu_mat) ? uword(0) : uword(1))); } const bool try_sympd = arma_config::optimise_sympd && ((auxlib::crippled_lapack(out)) ? false : sympd_helper::guess_sympd(out)); @@ -442,11 +468,13 @@ op_inv_gen_rcond::apply_direct(Mat& out, typename T1::po { arma_extra_debug_print("op_inv_gen_rcond: attempting sympd optimisation"); + out_state.is_sym = true; + Mat tmp = out; bool sympd_state = false; - const bool status = auxlib::inv_sympd_rcond(tmp, sympd_state, out_rcond, T(-1)); + const bool status = auxlib::inv_sympd_rcond(tmp, sympd_state, out_state.rcond, T(-1)); if(status) { out.steal_mem(tmp); return true; } @@ -457,7 +485,7 @@ op_inv_gen_rcond::apply_direct(Mat& out, typename T1::po // fallthrough if optimisation failed } - return auxlib::inv_rcond(out, out_rcond); + return auxlib::inv_rcond(out, out_state.rcond); } diff --git a/Include/armadillo/armadillo_bits/op_inv_spd_bones.hpp b/Include/armadillo/armadillo_bits/op_inv_spd_bones.hpp index 3e17a9c34..21ea44e32 100644 --- a/Include/armadillo/armadillo_bits/op_inv_spd_bones.hpp +++ b/Include/armadillo/armadillo_bits/op_inv_spd_bones.hpp @@ -58,13 +58,22 @@ class op_inv_spd_full +template +struct op_inv_spd_state + { + T rcond = T(0); + bool is_diag = false; + }; + + + class op_inv_spd_rcond : public traits_op_default { public: template - inline static bool apply_direct(Mat& out_inv, typename T1::pod_type& out_rcond, const Base& expr); + inline static bool apply_direct(Mat& out_inv, op_inv_spd_state& out_state, const Base& expr); }; diff --git a/Include/armadillo/armadillo_bits/op_inv_spd_meat.hpp b/Include/armadillo/armadillo_bits/op_inv_spd_meat.hpp index e4170dc33..279f042ec 100644 --- a/Include/armadillo/armadillo_bits/op_inv_spd_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_inv_spd_meat.hpp @@ -62,7 +62,7 @@ op_inv_spd_full::apply(Mat& out, const Op& out, const Base inv_state; + + const bool status = op_inv_spd_rcond::apply_direct(out, inv_state, expr); + + const T local_rcond = inv_state.rcond; // workaround for bug in gcc 4.8 + + if((status == false) || (local_rcond < std::numeric_limits::epsilon()) || arma_isnan(local_rcond)) { return false; } + + return true; + } if(allow_approx) { - T rcond = T(0); + op_inv_spd_state inv_state; Mat tmp; - const bool status = op_inv_spd_rcond::apply_direct(tmp, rcond, expr); + const bool status = op_inv_spd_rcond::apply_direct(tmp, inv_state, expr); + + const T local_rcond = inv_state.rcond; // workaround for bug in gcc 4.8 - if((status == false) || (rcond < auxlib::epsilon_lapack(tmp))) + if((status == false) || (local_rcond < std::numeric_limits::epsilon()) || arma_isnan(local_rcond)) { const Mat A = expr.get_ref(); + if(inv_state.is_diag) { return op_pinv::apply_diag(out, A, T(0)); } + return op_pinv::apply_sym(out, A, T(0), uword(0)); } @@ -125,7 +149,7 @@ op_inv_spd_full::apply_direct(Mat& out, const Base 0)) { @@ -325,17 +349,17 @@ op_inv_spd_full::apply_tiny_4x4(Mat& X) template inline bool -op_inv_spd_rcond::apply_direct(Mat& out, typename T1::pod_type& out_rcond, const Base& expr) +op_inv_spd_rcond::apply_direct(Mat& out, op_inv_spd_state& out_state, const Base& expr) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; typedef typename T1::pod_type T; - out = expr.get_ref(); - out_rcond = T(0); + out = expr.get_ref(); + out_state.rcond = T(0); - arma_debug_check( (out.is_square() == false), "inv_sympd(): given matrix must be square sized" ); + arma_debug_check( (out.is_square() == false), "inv_sympd(): given matrix must be square sized", [&](){ out.soft_reset(); } ); if((arma_config::debug) && (arma_config::warn_level > 0)) { @@ -355,6 +379,8 @@ op_inv_spd_rcond::apply_direct(Mat& out, typename T1::po { arma_extra_debug_print("op_inv_spd_rcond: detected diagonal matrix"); + out_state.is_diag = true; + eT* colmem = out.memptr(); T max_abs_src_val = T(0); @@ -382,7 +408,7 @@ op_inv_spd_rcond::apply_direct(Mat& out, typename T1::po colmem += N; } - out_rcond = T(1) / (max_abs_src_val * max_abs_inv_val); + out_state.rcond = T(1) / (max_abs_src_val * max_abs_inv_val); return true; } @@ -397,18 +423,18 @@ op_inv_spd_rcond::apply_direct(Mat& out, typename T1::po auxlib::inv_sympd(out, sympd_state); - if(sympd_state == false) { out.soft_reset(); out_rcond = T(0); return false; } + if(sympd_state == false) { out.soft_reset(); out_state.rcond = T(0); return false; } - out_rcond = auxlib::rcond(tmp); + out_state.rcond = auxlib::rcond(tmp); - if(out_rcond == T(0)) { out.soft_reset(); return false; } + if(out_state.rcond == T(0)) { out.soft_reset(); return false; } return true; } bool is_sympd_junk = false; - return auxlib::inv_sympd_rcond(out, is_sympd_junk, out_rcond, T(-1)); + return auxlib::inv_sympd_rcond(out, is_sympd_junk, out_state.rcond, T(-1)); } diff --git a/Include/armadillo/armadillo_bits/op_nonzeros_meat.hpp b/Include/armadillo/armadillo_bits/op_nonzeros_meat.hpp index 7d197a1e4..8cf32fa19 100644 --- a/Include/armadillo/armadillo_bits/op_nonzeros_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_nonzeros_meat.hpp @@ -113,23 +113,37 @@ op_nonzeros_spmat::apply(Mat& out, const SpToDOp 0) + if(N == 0) { return; } + + if(is_SpMat::stored_type>::value) { - if(is_SpMat::stored_type>::value) - { - const unwrap_spmat::stored_type> U(P.Q); - - arrayops::copy(out.memptr(), U.M.values, N); - } - else + const unwrap_spmat::stored_type> U(P.Q); + + arrayops::copy(out.memptr(), U.M.values, N); + + return; + } + + if(is_SpSubview::stored_type>::value) + { + const SpSubview& sv = reinterpret_cast< const SpSubview& >(P.Q); + + if(sv.n_rows == sv.m.n_rows) { - eT* out_mem = out.memptr(); + const SpMat& m = sv.m; + const uword col = sv.aux_col1; - typename SpProxy::const_iterator_type it = P.begin(); + arrayops::copy(out.memptr(), &(m.values[ m.col_ptrs[col] ]), N); - for(uword i=0; i::const_iterator_type it = P.begin(); + + for(uword i=0; i inline static void apply(Mat& out, const Op& in); + + template inline static bool apply_direct(Mat& out, const Base& expr); + }; + + + class op_pinv : public traits_op_default { diff --git a/Include/armadillo/armadillo_bits/op_pinv_meat.hpp b/Include/armadillo/armadillo_bits/op_pinv_meat.hpp index 7ea8e3d66..283c369ff 100644 --- a/Include/armadillo/armadillo_bits/op_pinv_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_pinv_meat.hpp @@ -22,6 +22,45 @@ +template +inline +void +op_pinv_default::apply(Mat& out, const Op& in) + { + arma_extra_debug_sigprint(); + + const bool status = op_pinv_default::apply_direct(out, in.m); + + if(status == false) + { + out.soft_reset(); + arma_stop_runtime_error("pinv(): svd failed"); + } + } + + + +template +inline +bool +op_pinv_default::apply_direct(Mat& out, const Base& expr) + { + arma_extra_debug_sigprint(); + + typedef typename T1::pod_type T; + + constexpr T tol = T(0); + constexpr uword method_id = uword(0); + + return op_pinv::apply_direct(out, expr, tol, method_id); + } + + + +// + + + template inline void diff --git a/Include/armadillo/armadillo_bits/op_rcond_bones.hpp b/Include/armadillo/armadillo_bits/op_rcond_bones.hpp new file mode 100644 index 000000000..88697e8dc --- /dev/null +++ b/Include/armadillo/armadillo_bits/op_rcond_bones.hpp @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup op_rcond +//! @{ + + +class op_rcond + : public traits_op_default + { + public: + + template static inline typename T1::pod_type apply(const Base& X); + }; + + +//! @} diff --git a/Include/armadillo/armadillo_bits/op_rcond_meat.hpp b/Include/armadillo/armadillo_bits/op_rcond_meat.hpp new file mode 100644 index 000000000..d0947c544 --- /dev/null +++ b/Include/armadillo/armadillo_bits/op_rcond_meat.hpp @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au) +// Copyright 2008-2016 National ICT Australia (NICTA) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ------------------------------------------------------------------------ + + +//! \addtogroup op_rcond +//! @{ + + + +template +inline +typename T1::pod_type +op_rcond::apply(const Base& X) + { + arma_extra_debug_sigprint(); + + typedef typename T1::elem_type eT; + typedef typename T1::pod_type T; + + if(strip_trimat::do_trimat) + { + const strip_trimat S(X.get_ref()); + + const quasi_unwrap::stored_type> U(S.M); + + arma_debug_check( (U.M.is_square() == false), "rcond(): matrix must be square sized" ); + + const uword layout = (S.do_triu) ? uword(0) : uword(1); + + return auxlib::rcond_trimat(U.M, layout); + } + + Mat A = X.get_ref(); + + arma_debug_check( (A.is_square() == false), "rcond(): matrix must be square sized" ); + + if(A.is_empty()) { return Datum::inf; } + + if(is_op_diagmat::value || A.is_diagmat()) + { + arma_extra_debug_print("op_rcond::apply(): detected diagonal matrix"); + + const eT* colmem = A.memptr(); + const uword N = A.n_rows; + + T abs_min = Datum::inf; + T abs_max = T(0); + + for(uword i=0; i abs_max) ? abs_val : abs_max; + + colmem += N; + } + + if((abs_min == T(0)) || (abs_max == T(0))) { return T(0); } + + return T(abs_min / abs_max); + } + + const bool is_triu = trimat_helper::is_triu(A); + const bool is_tril = (is_triu) ? false : trimat_helper::is_tril(A); + + if(is_triu || is_tril) + { + const uword layout = (is_triu) ? uword(0) : uword(1); + + return auxlib::rcond_trimat(A, layout); + } + + const bool try_sympd = arma_config::optimise_sympd && (auxlib::crippled_lapack(A) ? false : sympd_helper::guess_sympd(A)); + + if(try_sympd) + { + arma_extra_debug_print("op_rcond::apply(): attempting sympd optimisation"); + + bool calc_ok = false; + + const T out_val = auxlib::rcond_sympd(A, calc_ok); + + if(calc_ok) { return out_val; } + + arma_extra_debug_print("op_rcond::apply(): sympd optimisation failed"); + + // auxlib::rcond_sympd() may have failed because A isn't really sympd + // restore A, as auxlib::rcond_sympd() may have destroyed it + A = X.get_ref(); + // fallthrough to the next return statement + } + + return auxlib::rcond(A); + } + + + +//! @} diff --git a/Include/armadillo/armadillo_bits/op_sum_meat.hpp b/Include/armadillo/armadillo_bits/op_sum_meat.hpp index f3ca32aff..b6b57b534 100644 --- a/Include/armadillo/armadillo_bits/op_sum_meat.hpp +++ b/Include/armadillo/armadillo_bits/op_sum_meat.hpp @@ -91,26 +91,36 @@ op_sum::apply_noalias_unwrap(Mat& out, const Proxy& const uword X_n_rows = X.n_rows; const uword X_n_cols = X.n_cols; + const uword out_n_rows = (dim == 0) ? uword(1) : X_n_rows; + const uword out_n_cols = (dim == 0) ? X_n_cols : uword(1); + + out.set_size(out_n_rows, out_n_cols); + + if(X.n_elem == 0) { out.zeros(); return; } + + const eT* X_colptr = X.memptr(); + eT* out_mem = out.memptr(); + if(dim == 0) { - out.set_size(1, X_n_cols); - - eT* out_mem = out.memptr(); - for(uword col=0; col < X_n_cols; ++col) { - out_mem[col] = arrayops::accumulate( X.colptr(col), X_n_rows ); + out_mem[col] = arrayops::accumulate( X_colptr, X_n_rows ); + + X_colptr += X_n_rows; } } else { - out.zeros(X_n_rows, 1); + arrayops::copy(out_mem, X_colptr, X_n_rows); - eT* out_mem = out.memptr(); + X_colptr += X_n_rows; - for(uword col=0; col < X_n_cols; ++col) + for(uword col=1; col < X_n_cols; ++col) { - arrayops::inplace_plus( out_mem, X.colptr(col), X_n_rows ); + arrayops::inplace_plus( out_mem, X_colptr, X_n_rows ); + + X_colptr += X_n_rows; } } } @@ -130,42 +140,93 @@ op_sum::apply_noalias_proxy(Mat& out, const Proxy& P const uword P_n_rows = P.get_n_rows(); const uword P_n_cols = P.get_n_cols(); - if(dim == 0) + const uword out_n_rows = (dim == 0) ? uword(1) : P_n_rows; + const uword out_n_cols = (dim == 0) ? P_n_cols : uword(1); + + out.set_size(out_n_rows, out_n_cols); + + if(P.get_n_elem() == 0) { out.zeros(); return; } + + eT* out_mem = out.memptr(); + + if(Proxy::use_at == false) { - out.set_size(1, P_n_cols); - - eT* out_mem = out.memptr(); - - for(uword col=0; col < P_n_cols; ++col) + if(dim == 0) { - eT val1 = eT(0); - eT val2 = eT(0); + uword count = 0; - uword i,j; - for(i=0, j=1; j < P_n_rows; i+=2, j+=2) + for(uword col=0; col < P_n_cols; ++col) { - val1 += P.at(i,col); - val2 += P.at(j,col); + eT val1 = eT(0); + eT val2 = eT(0); + + uword j; + for(j=1; j < P_n_rows; j+=2) + { + val1 += P[count]; ++count; + val2 += P[count]; ++count; + } + + if((j-1) < P_n_rows) + { + val1 += P[count]; ++count; + } + + out_mem[col] = (val1 + val2); } + } + else + { + uword count = 0; - if(i < P_n_rows) + for(uword row=0; row < P_n_rows; ++row) { - val1 += P.at(i,col); + out_mem[row] = P[count]; ++count; } - out_mem[col] = (val1 + val2); + for(uword col=1; col < P_n_cols; ++col) + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] += P[count]; ++count; + } } } else { - out.zeros(P_n_rows, 1); - - eT* out_mem = out.memptr(); - - for(uword col=0; col < P_n_cols; ++col) - for(uword row=0; row < P_n_rows; ++row) + if(dim == 0) + { + for(uword col=0; col < P_n_cols; ++col) + { + eT val1 = eT(0); + eT val2 = eT(0); + + uword i,j; + for(i=0, j=1; j < P_n_rows; i+=2, j+=2) + { + val1 += P.at(i,col); + val2 += P.at(j,col); + } + + if(i < P_n_rows) + { + val1 += P.at(i,col); + } + + out_mem[col] = (val1 + val2); + } + } + else { - out_mem[row] += P.at(row,col); + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] = P.at(row,0); + } + + for(uword col=1; col < P_n_cols; ++col) + for(uword row=0; row < P_n_rows; ++row) + { + out_mem[row] += P.at(row,col); + } } } } diff --git a/Include/armadillo/armadillo_bits/sp_auxlib_meat.hpp b/Include/armadillo/armadillo_bits/sp_auxlib_meat.hpp index 4ee8efaf9..2de68d594 100644 --- a/Include/armadillo/armadillo_bits/sp_auxlib_meat.hpp +++ b/Include/armadillo/armadillo_bits/sp_auxlib_meat.hpp @@ -1184,21 +1184,14 @@ sp_auxlib::spsolve_simple(Mat& X, const SpBase A.n_cols) + if(A.is_square() == false) { - arma_stop_logic_error("spsolve(): solving over-determined systems currently not supported"); - X.soft_reset(); - return false; - } - else - if(A.n_rows < A.n_cols) - { - arma_stop_logic_error("spsolve(): solving under-determined systems currently not supported"); X.soft_reset(); + arma_stop_logic_error("spsolve(): solving under-determined / over-determined systems is currently not supported"); return false; } - arma_debug_check( (A.n_rows != X.n_rows), "spsolve(): number of rows in the given objects must be the same" ); + arma_debug_check( (A.n_rows != X.n_rows), "spsolve(): number of rows in the given objects must be the same", [&](){ X.soft_reset(); } ); if(A.is_empty() || X.is_empty()) { @@ -1318,21 +1311,14 @@ sp_auxlib::spsolve_refine(Mat& X, typename T1::pod_type& const Mat& B = (B_is_modified) ? B_copy : B_unwrap; - if(A.n_rows > A.n_cols) + if(A.is_square() == false) { - arma_stop_logic_error("spsolve(): solving over-determined systems currently not supported"); - X.soft_reset(); - return false; - } - else - if(A.n_rows < A.n_cols) - { - arma_stop_logic_error("spsolve(): solving under-determined systems currently not supported"); X.soft_reset(); + arma_stop_logic_error("spsolve(): solving under-determined / over-determined systems is currently not supported"); return false; } - arma_debug_check( (A.n_rows != B.n_rows), "spsolve(): number of rows in the given objects must be the same" ); + arma_debug_check( (A.n_rows != B.n_rows), "spsolve(): number of rows in the given objects must be the same", [&](){ X.soft_reset(); } ); X.zeros(A.n_cols, B.n_cols); // set the elements to zero, as we don't trust the SuperLU spaghetti code @@ -1390,10 +1376,10 @@ sp_auxlib::spsolve_refine(Mat& X, typename T1::pod_type& superlu_array_wrangler berr(B.n_cols+1); superlu::GlobalLU_t glu; - arrayops::inplace_set(reinterpret_cast(&glu), char(0), sizeof(superlu::GlobalLU_t)); + arrayops::fill_zeros(reinterpret_cast(&glu), sizeof(superlu::GlobalLU_t)); superlu::mem_usage_t mu; - arrayops::inplace_set(reinterpret_cast(&mu), char(0), sizeof(superlu::mem_usage_t)); + arrayops::fill_zeros(reinterpret_cast(&mu), sizeof(superlu::mem_usage_t)); superlu_stat_wrangler stat; @@ -1430,7 +1416,7 @@ sp_auxlib::spsolve_refine(Mat& X, typename T1::pod_type& else if(info > int(A.n_cols+1)) { - arma_debug_warn_level(1, "spsolve(): memory allocation failure: could not allocate ", (info - int(A.n_cols)), " bytes"); + arma_debug_warn_level(1, "spsolve(): memory allocation failure"); } else if(info < 0) diff --git a/Include/armadillo/armadillo_bits/spop_norm_bones.hpp b/Include/armadillo/armadillo_bits/spop_norm_bones.hpp index d9464e04f..1d9445182 100644 --- a/Include/armadillo/armadillo_bits/spop_norm_bones.hpp +++ b/Include/armadillo/armadillo_bits/spop_norm_bones.hpp @@ -31,6 +31,8 @@ class spop_norm template inline static typename get_pod_type::result mat_norm_2(const SpMat& X, const typename arma_cx_only::result* junk = nullptr); template inline static typename get_pod_type::result mat_norm_inf(const SpMat& X); + + template inline static typename get_pod_type::result vec_norm_k(const eT* mem, const uword N, const uword k); }; diff --git a/Include/armadillo/armadillo_bits/spop_norm_meat.hpp b/Include/armadillo/armadillo_bits/spop_norm_meat.hpp index f4b93942e..6746319f0 100644 --- a/Include/armadillo/armadillo_bits/spop_norm_meat.hpp +++ b/Include/armadillo/armadillo_bits/spop_norm_meat.hpp @@ -104,4 +104,26 @@ spop_norm::mat_norm_inf(const SpMat& X) +template +inline +typename get_pod_type::result +spop_norm::vec_norm_k(const eT* mem, const uword N, const uword k) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (k == 0), "norm(): k must be greater than zero" ); + + // create a fake dense vector to allow reuse of code for dense vectors + Col fake_vector( access::rwp(mem), N, false ); + + const Proxy< Col > P_fake_vector(fake_vector); + + if(k == uword(1)) { return op_norm::vec_norm_1(P_fake_vector); } + if(k == uword(2)) { return op_norm::vec_norm_2(P_fake_vector); } + + return op_norm::vec_norm_k(P_fake_vector, int(k)); + } + + + //! @} diff --git a/Include/armadillo/armadillo_bits/subview_cube_each_bones.hpp b/Include/armadillo/armadillo_bits/subview_cube_each_bones.hpp index 0862aae06..29d81fd24 100644 --- a/Include/armadillo/armadillo_bits/subview_cube_each_bones.hpp +++ b/Include/armadillo/armadillo_bits/subview_cube_each_bones.hpp @@ -28,7 +28,8 @@ class subview_cube_each_common const Cube& P; - inline void check_size(const Mat& A) const; + template + inline void check_size(const Mat& A) const; protected: @@ -36,7 +37,8 @@ class subview_cube_each_common arma_inline subview_cube_each_common(const Cube& in_p); inline subview_cube_each_common() = delete; - arma_cold inline const std::string incompat_size_string(const Mat& A) const; + template + arma_cold inline const std::string incompat_size_string(const Mat& A) const; }; @@ -104,7 +106,7 @@ class subview_cube_each1_aux template static inline Cube operator_plus(const subview_cube_each1& X, const Base& Y); - + template static inline Cube operator_minus(const subview_cube_each1& X, const Base& Y); @@ -135,7 +137,7 @@ class subview_cube_each2_aux template static inline Cube operator_plus(const subview_cube_each2& X, const Base& Y); - + template static inline Cube operator_minus(const subview_cube_each2& X, const Base& Y); diff --git a/Include/armadillo/armadillo_bits/subview_cube_each_meat.hpp b/Include/armadillo/armadillo_bits/subview_cube_each_meat.hpp index 7af493b55..a518444fe 100644 --- a/Include/armadillo/armadillo_bits/subview_cube_each_meat.hpp +++ b/Include/armadillo/armadillo_bits/subview_cube_each_meat.hpp @@ -35,9 +35,10 @@ subview_cube_each_common::subview_cube_each_common(const Cube& in_p) template +template inline void -subview_cube_each_common::check_size(const Mat& A) const +subview_cube_each_common::check_size(const Mat& A) const { if(arma_config::debug) { @@ -51,10 +52,11 @@ subview_cube_each_common::check_size(const Mat& A) const template +template arma_cold inline const std::string -subview_cube_each_common::incompat_size_string(const Mat& A) const +subview_cube_each_common::incompat_size_string(const Mat& A) const { std::ostringstream tmp; @@ -134,7 +136,7 @@ subview_cube_each1::operator+= (const Base& in) const uword p_n_elem_slice = p.n_elem_slice; const eT* A_mem = A.memptr(); - + for(uword i=0; i < p_n_slices; ++i) { arrayops::inplace_plus( p.slice_memptr(i), A_mem, p_n_elem_slice ); } } @@ -287,7 +289,7 @@ subview_cube_each2::operator= (const Base& in) const uword p_n_slices = p.n_slices; const uword p_n_elem_slice = p.n_elem_slice; - + const uword* indices_mem = U.M.memptr(); const uword N = U.M.n_elem; @@ -326,7 +328,7 @@ subview_cube_each2::operator+= (const Base& in) const uword p_n_slices = p.n_slices; const uword p_n_elem_slice = p.n_elem_slice; - + const uword* indices_mem = U.M.memptr(); const uword N = U.M.n_elem; @@ -365,7 +367,7 @@ subview_cube_each2::operator-= (const Base& in) const uword p_n_slices = p.n_slices; const uword p_n_elem_slice = p.n_elem_slice; - + const uword* indices_mem = U.M.memptr(); const uword N = U.M.n_elem; @@ -404,7 +406,7 @@ subview_cube_each2::operator%= (const Base& in) const uword p_n_slices = p.n_slices; const uword p_n_elem_slice = p.n_elem_slice; - + const uword* indices_mem = U.M.memptr(); const uword N = U.M.n_elem; @@ -443,7 +445,7 @@ subview_cube_each2::operator/= (const Base& in) const uword p_n_slices = p.n_slices; const uword p_n_elem_slice = p.n_elem_slice; - + const uword* indices_mem = U.M.memptr(); const uword N = U.M.n_elem; diff --git a/Include/armadillo/armadillo_bits/subview_each_bones.hpp b/Include/armadillo/armadillo_bits/subview_each_bones.hpp index 09d14cff5..dcb58cd83 100644 --- a/Include/armadillo/armadillo_bits/subview_each_bones.hpp +++ b/Include/armadillo/armadillo_bits/subview_each_bones.hpp @@ -30,7 +30,8 @@ class subview_each_common const parent& P; - inline void check_size(const Mat& A) const; + template + inline void check_size(const Mat& A) const; protected: @@ -43,7 +44,8 @@ class subview_each_common arma_inline const Mat& get_mat_ref() const; - arma_cold inline const std::string incompat_size_string(const Mat& A) const; + template + arma_cold inline const std::string incompat_size_string(const Mat& A) const; }; @@ -117,7 +119,7 @@ class subview_each1_aux template static inline Mat operator_plus(const subview_each1& X, const Base& Y); - + template static inline Mat operator_minus(const subview_each1& X, const Base& Y); @@ -142,7 +144,7 @@ class subview_each2_aux template static inline Mat operator_plus(const subview_each2& X, const Base& Y); - + template static inline Mat operator_minus(const subview_each2& X, const Base& Y); diff --git a/Include/armadillo/armadillo_bits/subview_each_meat.hpp b/Include/armadillo/armadillo_bits/subview_each_meat.hpp index 2acf52e52..e48064f4d 100644 --- a/Include/armadillo/armadillo_bits/subview_each_meat.hpp +++ b/Include/armadillo/armadillo_bits/subview_each_meat.hpp @@ -65,11 +65,12 @@ subview_each_common::get_mat_ref() const template +template inline void -subview_each_common::check_size(const Mat& A) const +subview_each_common::check_size(const Mat& A) const { - if(arma_config::debug == true) + if(arma_config::debug) { if(mode == 0) { @@ -91,10 +92,11 @@ subview_each_common::check_size(const Mat +template arma_cold inline const std::string -subview_each_common::incompat_size_string(const Mat& A) const +subview_each_common::incompat_size_string(const Mat& A) const { std::ostringstream tmp; @@ -192,7 +194,7 @@ subview_each1::operator+= (const Base& in) const eT* A_mem = A.memptr(); const uword p_n_rows = p.n_rows; const uword p_n_cols = p.n_cols; - + if(mode == 0) // each column { for(uword i=0; i < p_n_cols; ++i) @@ -229,7 +231,7 @@ subview_each1::operator-= (const Base& in) const eT* A_mem = A.memptr(); const uword p_n_rows = p.n_rows; const uword p_n_cols = p.n_cols; - + if(mode == 0) // each column { for(uword i=0; i < p_n_cols; ++i) diff --git a/Include/armadillo/armadillo_bits/subview_elem1_meat.hpp b/Include/armadillo/armadillo_bits/subview_elem1_meat.hpp index 955956dcf..d1b67128e 100644 --- a/Include/armadillo/armadillo_bits/subview_elem1_meat.hpp +++ b/Include/armadillo/armadillo_bits/subview_elem1_meat.hpp @@ -806,7 +806,7 @@ subview_elem1::extract(Mat& actual_out, const subview_elem1& i if(alias) { arma_extra_debug_print("subview_elem1::extract(): aliasing detected"); } - Mat* tmp_out = alias ? new Mat() : 0; + Mat* tmp_out = alias ? new Mat() : nullptr; Mat& out = alias ? *tmp_out : actual_out; out.set_size(aa_n_elem, 1); diff --git a/Include/armadillo/armadillo_bits/subview_elem2_meat.hpp b/Include/armadillo/armadillo_bits/subview_elem2_meat.hpp index 3245c3587..69d5f5d72 100644 --- a/Include/armadillo/armadillo_bits/subview_elem2_meat.hpp +++ b/Include/armadillo/armadillo_bits/subview_elem2_meat.hpp @@ -699,7 +699,7 @@ subview_elem2::extract(Mat& actual_out, const subview_elem2* tmp_out = alias ? new Mat() : 0; + Mat* tmp_out = alias ? new Mat() : nullptr; Mat& out = alias ? *tmp_out : actual_out; if( (in.all_rows == false) && (in.all_cols == false) ) diff --git a/Include/armadillo/armadillo_bits/subview_field_meat.hpp b/Include/armadillo/armadillo_bits/subview_field_meat.hpp index 1c8833cb6..9b2f0c3a7 100644 --- a/Include/armadillo/armadillo_bits/subview_field_meat.hpp +++ b/Include/armadillo/armadillo_bits/subview_field_meat.hpp @@ -514,7 +514,7 @@ subview_field::extract(field& actual_out, const subview_field& in) // const bool alias = (&actual_out == &in.f); - field* tmp = (alias) ? new field : 0; + field* tmp = (alias) ? new field : nullptr; field& out = (alias) ? (*tmp) : actual_out; // diff --git a/Include/armadillo/armadillo_exts/BandMat_bones.hpp b/Include/armadillo/armadillo_exts/BandMat_bones.hpp deleted file mode 100644 index 3ee1d1ae4..000000000 --- a/Include/armadillo/armadillo_exts/BandMat_bones.hpp +++ /dev/null @@ -1,79 +0,0 @@ -template class BandMat : public Base> { -public: - typedef eT elem_type; - typedef typename get_pod_type::result pod_type; - - static const bool is_row = false; - static const bool is_col = false; - - const uword n_cols; - const uword n_l; - const uword n_u; - const uword n_rows = 2 * n_l + n_u + 1; - -private: - const uword n_s = n_l + n_u; - const uword n_a = n_rows - n_l; - const uword n_b = n_rows - 1; - -public: - const uword n_elem; - const uhword mem_state; - - arma_aligned const eT* - const mem; - - arma_align_mem eT mem_local[arma_config::mat_prealloc]; - - ~BandMat(); - BandMat(); - - explicit BandMat(const uword& in_size, const uword& in_l, const uword& in_u); - - template BandMat(const uword& in_size, const uword& in_l, const uword& in_u, const fill::fill_class& f); - - BandMat& operator=(const eT& val); - BandMat& operator+=(const eT& val); - BandMat& operator-=(const eT& val); - BandMat& operator*=(const eT& val); - BandMat& operator/=(const eT& val); - - BandMat(const BandMat& m); - BandMat& operator=(const BandMat& m); - BandMat& operator+=(const BandMat& m); - BandMat& operator-=(const BandMat& m); - BandMat& operator*=(const BandMat& m) = delete; - BandMat& operator%=(const BandMat& m); - BandMat& operator/=(const BandMat& m); - - template BandMat(const BdOp& X); - - template BandMat& operator=(const BdOp& X); - - eT& at(const uword& in_row, const uword& in_col); - const eT& at(const uword& in_row, const uword& in_col) const; - eT& operator()(const uword& in_row, const uword& in_col); - const eT& operator()(const uword& in_row, const uword& in_col) const; - - eT* memptr(); - const eT* memptr() const; - - void set_size(const uword in_size, const uword& in_l, const uword& in_u); - - arma_hot const BandMat& fill(const eT val); - template arma_hot const BandMat& fill(const fill::fill_class&); - - const BandMat& zeros(); - const BandMat& zeros(const uword in_size, const uword& in_l, const uword& in_u); - - const BandMat& ones(); - const BandMat& ones(const uword in_size, const uword& in_l, const uword& in_u); - - const BandMat& eye(); - const BandMat& eye(const uword in_size, const uword& in_l, const uword& in_u); - - void reset(); - - void init_cold(); - void init_warm(const uword& in_size, const uword& in_l, const uword& in_u); -}; diff --git a/Include/armadillo/armadillo_exts/BandMat_meat.hpp b/Include/armadillo/armadillo_exts/BandMat_meat.hpp deleted file mode 100644 index d366bb4b4..000000000 --- a/Include/armadillo/armadillo_exts/BandMat_meat.hpp +++ /dev/null @@ -1,345 +0,0 @@ -template BandMat::~BandMat() {} - -template BandMat::BandMat() - : n_cols(0) - , n_l(0) - , n_u(0) - , n_elem(0) - , mem_state(0) - , mem() {} - -template BandMat::BandMat(const uword& in_size, const uword& in_l, const uword& in_u) - : n_cols(in_size) - , n_l(in_l) - , n_u(in_u) - , n_elem((in_u + 2 * in_l + 1) * in_size) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - init_cold(); -} - -template template BandMat::BandMat(const uword& in_size, const uword& in_l, const uword& in_u, const fill::fill_class& f) - : n_cols(in_size) - , n_l(in_l) - , n_u(in_u) - , n_elem((in_u + 2 * in_l + 1) * in_size) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - - init_cold(); - - (*this).fill(f); -} - -template BandMat& BandMat::operator=(const eT& val) { - init_warm(1, 0, 0); - access::rw(mem[0]) = val; - return *this; -} - -template BandMat& BandMat::operator+=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_plus(memptr(), val, n_elem); - - return *this; -} - -template BandMat& BandMat::operator-=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_minus(memptr(), val, n_elem); - - return *this; -} - -template BandMat& BandMat::operator*=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_mul(memptr(), val, n_elem); - - return *this; -} - -template BandMat& BandMat::operator/=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_div(memptr(), val, n_elem); - - return *this; -} - -template BandMat::BandMat(const BandMat& m) - : n_cols(m.n_cols) - , n_elem(m.n_elem) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint(arma_str::format("this = %x in_mat = %x") % this % &m); - - init_cold(); - - arrayops::copy(memptr(), m.mem, m.n_elem); -} - -template BandMat& BandMat::operator=(const BandMat& m) { - arma_extra_debug_sigprint(arma_str::format("this = %x in_mat = %x") % this % &m); - - if(this != &m) { - init_warm(m.n_cols, m.n_l, m.n_u); - - arrayops::copy(memptr(), m.mem, m.n_elem); - } - - return *this; -} - -template BandMat& BandMat::operator+=(const BandMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "addition"); - - arrayops::inplace_plus(memptr(), m.memptr(), n_elem); - - return *this; -} - -template BandMat& BandMat::operator-=(const BandMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "subtraction"); - - arrayops::inplace_minus(memptr(), m.memptr(), n_elem); - - return *this; -} - -template BandMat& BandMat::operator%=(const BandMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "element-wise multiplication"); - - arrayops::inplace_mul(memptr(), m.memptr(), n_elem); - - return *this; -} - -template BandMat& BandMat::operator/=(const BandMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "element-wise division"); - - arrayops::inplace_div(memptr(), m.memptr(), n_elem); - - return *this; -} - -template template BandMat::BandMat(const BdOp& X) - : n_cols(0) - , n_l(0) - , n_u(0) - , n_elem(0) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - - arma_type_check((is_same_type::no)); - - bdop_type::apply(*this, X); -} - -template template BandMat& BandMat::operator=(const BdOp& X) { - arma_extra_debug_sigprint(); - - arma_type_check((is_same_type::no)); - - bdop_type::apply(*this, X); - - return *this; -} - -template eT& BandMat::at(const uword& in_row, const uword& in_col) { return access::rw(mem[n_a + n_b * in_col + in_row]); } - -template const eT& BandMat::at(const uword& in_row, const uword& in_col) const { return mem[n_a + n_b * in_col + in_row]; } - -template eT& BandMat::operator()(const uword& in_row, const uword& in_col) { - arma_debug_check(in_row >= n_cols || in_col >= n_cols, "BandMat::operator(): index out of bounds"); - return at(in_row, in_col); -} - -template const eT& BandMat::operator()(const uword& in_row, const uword& in_col) const { - arma_debug_check(in_row >= n_cols || in_col >= n_cols, "BandMat::operator(): index out of bounds"); - return at(in_row, in_col); -} - -template void BandMat::init_cold() { - arma_extra_debug_sigprint(arma_str::format("n_size = %d") % n_cols); - -#if(defined(ARMA_USE_CXX11) || defined(ARMA_64BIT_WORD)) - auto error_message = "BandMat::init(): requested size is too large"; -#else - const char* error_message = "BandMat::init(): requested size is too large; suggest to " - "compile in C++11 mode or enable ARMA_64BIT_WORD"; -#endif - - arma_debug_check(n_cols > ARMA_MAX_UHWORD ? n_elem > ARMA_MAX_UWORD : false, error_message); - - if(n_elem <= arma_config::mat_prealloc) - if(n_elem == 0) access::rw(mem) = NULL; - else { - arma_extra_debug_print("BandMat::init(): using local memory"); - access::rw(mem) = mem_local; - } - else { - arma_extra_debug_print("BandMat::init(): acquiring memory"); - access::rw(mem) = memory::acquire(n_elem); - } -} - -template void BandMat::init_warm(const uword& in_size, const uword& in_l, const uword& in_u) { - arma_extra_debug_sigprint(arma_str::format("in_n_size = %d") % in_size); - - if(n_cols == in_size && n_l == in_l && n_u == in_u) return; - - auto err_state = false; - char* err_msg = nullptr; - - const uhword t_mem_state = mem_state; - - arma_debug_set_error(err_state, err_msg, t_mem_state == 3, "BandMat::init(): size is fixed and hence cannot be changed"); - -#if(defined(ARMA_USE_CXX11) || defined(ARMA_64BIT_WORD)) - auto error_message = "BandMat::init(): requested size is too large"; -#else - const char* error_message = "BandMat::init(): requested size is too large; suggest " - "to compile in C++11 mode or enable ARMA_64BIT_WORD"; -#endif - - arma_debug_set_error(err_state, err_msg, in_size > ARMA_MAX_UHWORD ? (2 * in_l + in_u + 1) * in_size > ARMA_MAX_UWORD : false, error_message); - - arma_debug_check(err_state, err_msg); - - const auto old_n_elem = n_elem; - const auto new_n_elem = (2 * in_l + in_u + 1) * in_size; - - if(old_n_elem == new_n_elem) { arma_extra_debug_print("BandMat::init(): reusing memory"); } - else { - arma_debug_check(t_mem_state == 2, - "BandMat::init(): mismatch between size of " - "auxiliary memory and requested size"); - - if(new_n_elem < old_n_elem) { - if(t_mem_state == 0 && new_n_elem <= arma_config::mat_prealloc) { - if(old_n_elem > arma_config::mat_prealloc) { - arma_extra_debug_print("BandMat::init(): releasing memory"); - memory::release(access::rw(mem)); - } - - access::rw(mem) = new_n_elem == 0 ? NULL : mem_local; - } - else { arma_extra_debug_print("BandMat::init(): reusing memory"); } - } - else { - if(t_mem_state == 0 && old_n_elem > arma_config::mat_prealloc) { - arma_extra_debug_print("BandMat::init(): releasing memory"); - memory::release(access::rw(mem)); - } - - access::rw(mem) = new_n_elem <= arma_config::mat_prealloc ? mem_local : memory::acquire(new_n_elem); - - access::rw(mem_state) = 0; - } - - access::rw(n_cols) = in_size; - access::rw(n_l) = in_l; - access::rw(n_u) = in_u; - access::rw(n_s) = n_l + n_u; - access::rw(n_rows) = 2 * n_l + n_u + 1; - access::rw(n_a) = n_rows - n_l; - access::rw(n_b) = n_rows - 1; - access::rw(n_elem) = new_n_elem; - } -} - -template eT* BandMat::memptr() { return const_cast(mem); } - -template const eT* BandMat::memptr() const { return mem; } - -template void BandMat::set_size(const uword in_size, const uword& in_l, const uword& in_u) { - arma_extra_debug_sigprint(); - - init_warm(in_size, in_l, in_u); -} - -template const BandMat& BandMat::fill(const eT val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_set(memptr(), val, n_elem); - - return *this; -} - -template template const BandMat& BandMat::fill(const fill::fill_class&) { - arma_extra_debug_sigprint(); - - if(is_same_type::yes) (*this).zeros(); - if(is_same_type::yes) (*this).ones(); - if(is_same_type::yes) (*this).eye(); - - return *this; -} - -template const BandMat& BandMat::zeros() { - arma_extra_debug_sigprint(); - - arrayops::fill_zeros(memptr(), n_elem); - - return *this; -} - -template const BandMat& BandMat::zeros(const uword in_size, const uword& in_l, const uword& in_u) { - arma_extra_debug_sigprint(); - - set_size(in_size, in_l, in_u); - - return (*this).zeros(); -} - -template const BandMat& BandMat::ones() { - arma_extra_debug_sigprint(); - - return fill(eT(1)); -} - -template const BandMat& BandMat::ones(const uword in_size, const uword& in_l, const uword& in_u) { - arma_extra_debug_sigprint(); - - set_size(in_size, in_l, in_u); - - return fill(eT(1)); -} - -template const BandMat& BandMat::eye() { - arma_extra_debug_sigprint(); - - (*this).zeros(); - - for(uword ii = 0; ii < n_cols; ++ii) at(ii, ii) = eT(1); - - return *this; -} - -template const BandMat& BandMat::eye(const uword in_size, const uword& in_l, const uword& in_u) { - arma_extra_debug_sigprint(); - - set_size(in_size, in_l, in_u); - - return (*this).eye(); -} - -template void BandMat::reset() { - arma_extra_debug_sigprint(); - - init_warm(0, 0, 0); -} diff --git a/Include/armadillo/armadillo_exts/BdOp_bones.hpp b/Include/armadillo/armadillo_exts/BdOp_bones.hpp deleted file mode 100644 index 463b04f85..000000000 --- a/Include/armadillo/armadillo_exts/BdOp_bones.hpp +++ /dev/null @@ -1,18 +0,0 @@ -template class BdOp : public Base> { -public: - typedef typename T1::elem_type elem_type; - typedef typename get_pod_type::result pod_type; - - static const bool is_row = false; - static const bool is_col = false; - - explicit BdOp(const T1& in_m); - BdOp(const T1& in_m, const elem_type in_aux); - BdOp(const T1& in_m, const uword in_aux_uword_a, const uword in_aux_uword_b); - ~BdOp(); - - arma_aligned const T1& m; //!< storage of reference to the operand (eg. a matrix) - arma_aligned elem_type aux; //!< storage of auxiliary data, user defined format - arma_aligned uword aux_uword_a = 0; //!< storage of auxiliary data, uword format - arma_aligned uword aux_uword_b = 0; //!< storage of auxiliary data, uword format -}; diff --git a/Include/armadillo/armadillo_exts/BdOp_meat.hpp b/Include/armadillo/armadillo_exts/BdOp_meat.hpp deleted file mode 100644 index b549a8482..000000000 --- a/Include/armadillo/armadillo_exts/BdOp_meat.hpp +++ /dev/null @@ -1,13 +0,0 @@ -template BdOp::BdOp(const T1& in_m) - : m(in_m) { arma_extra_debug_sigprint(); } - -template BdOp::BdOp(const T1& in_m, const typename T1::elem_type in_aux) - : m(in_m) - , aux(in_aux) { arma_extra_debug_sigprint(); } - -template BdOp::BdOp(const T1& in_m, const uword in_aux_uword_a, const uword in_aux_uword_b) - : m(in_m) - , aux_uword_a(in_aux_uword_a) - , aux_uword_b(in_aux_uword_b) { arma_extra_debug_sigprint(); } - -template BdOp::~BdOp() { arma_extra_debug_sigprint(); } diff --git a/Include/armadillo/armadillo_exts/SmOp_bones.hpp b/Include/armadillo/armadillo_exts/SmOp_bones.hpp deleted file mode 100644 index 8a1ecc226..000000000 --- a/Include/armadillo/armadillo_exts/SmOp_bones.hpp +++ /dev/null @@ -1,18 +0,0 @@ -template class SmOp : public Base> { -public: - typedef typename T1::elem_type elem_type; - typedef typename get_pod_type::result pod_type; - - static const bool is_row = false; - static const bool is_col = false; - - explicit SmOp(const T1& in_m); - SmOp(const T1& in_m, const elem_type in_aux); - SmOp(const T1& in_m, const uword in_aux_uword_a, const uword in_aux_uword_b); - ~SmOp(); - - arma_aligned const T1& m; //!< storage of reference to the operand (eg. a matrix) - arma_aligned elem_type aux; //!< storage of auxiliary data, user defined format - arma_aligned uword aux_uword_a = 0; //!< storage of auxiliary data, uword format - arma_aligned uword aux_uword_b = 0; //!< storage of auxiliary data, uword format -}; diff --git a/Include/armadillo/armadillo_exts/SmOp_meat.hpp b/Include/armadillo/armadillo_exts/SmOp_meat.hpp deleted file mode 100644 index 4f22b9fd9..000000000 --- a/Include/armadillo/armadillo_exts/SmOp_meat.hpp +++ /dev/null @@ -1,13 +0,0 @@ -template SmOp::SmOp(const T1& in_m) - : m(in_m) { arma_extra_debug_sigprint(); } - -template SmOp::SmOp(const T1& in_m, const typename T1::elem_type in_aux) - : m(in_m) - , aux(in_aux) { arma_extra_debug_sigprint(); } - -template SmOp::SmOp(const T1& in_m, const uword in_aux_uword_a, const uword in_aux_uword_b) - : m(in_m) - , aux_uword_a(in_aux_uword_a) - , aux_uword_b(in_aux_uword_b) { arma_extra_debug_sigprint(); } - -template SmOp::~SmOp() { arma_extra_debug_sigprint(); } diff --git a/Include/armadillo/armadillo_exts/SymmMat_bones.hpp b/Include/armadillo/armadillo_exts/SymmMat_bones.hpp deleted file mode 100644 index 609cca8b9..000000000 --- a/Include/armadillo/armadillo_exts/SymmMat_bones.hpp +++ /dev/null @@ -1,76 +0,0 @@ -template class SymmMat : public Base> { -public: - typedef eT elem_type; - typedef typename get_pod_type::result pod_type; - - static const bool is_row = false; - static const bool is_col = false; - - const uword n_size; - const uword n_elem; - const uhword mem_state; - - arma_aligned const eT* - const mem; - - arma_align_mem eT mem_local[arma_config::mat_prealloc]; - - ~SymmMat(); - SymmMat(); - - explicit SymmMat(const uword& in_size); - explicit SymmMat(const SizeMat& s); - - template SymmMat(const uword& in_size, const fill::fill_class& f); - template SymmMat(const SizeMat& s, const fill::fill_class& f); - - SymmMat(const Mat& in_mat); - - SymmMat& operator=(const eT& val); - SymmMat& operator+=(const eT& val); - SymmMat& operator-=(const eT& val); - SymmMat& operator*=(const eT& val); - SymmMat& operator/=(const eT& val); - - SymmMat(const SymmMat& m); - SymmMat& operator=(const SymmMat& m); - SymmMat& operator+=(const SymmMat& m); - SymmMat& operator-=(const SymmMat& m); - SymmMat& operator*=(const SymmMat& m) = delete; - SymmMat& operator%=(const SymmMat& m); - SymmMat& operator/=(const SymmMat& m); - - template SymmMat(const SmOp& X); - - template SymmMat& operator=(const Glue& X); - template SymmMat& operator=(const SmOp& X); - - eT& at(const uword& in_row, const uword& in_col); - const eT& at(const uword& in_row, const uword& in_col) const; - eT& operator()(const uword& in_row, const uword& in_col); - const eT& operator()(const uword& in_row, const uword& in_col) const; - - eT* memptr(); - const eT* memptr() const; - - void set_size(const uword in_size); - - arma_hot const SymmMat& fill(const eT val); - template arma_hot const SymmMat& fill(const fill::fill_class&); - - const SymmMat& zeros(); - const SymmMat& zeros(const uword in_size); - - const SymmMat& ones(); - const SymmMat& ones(const uword in_size); - - const SymmMat& eye(); - const SymmMat& eye(const uword in_size); - - void reset(); - - void init_cold(); - void init_warm(const uword& in_size); - - void print() const; -}; diff --git a/Include/armadillo/armadillo_exts/SymmMat_meat.hpp b/Include/armadillo/armadillo_exts/SymmMat_meat.hpp deleted file mode 100644 index 681ff8e99..000000000 --- a/Include/armadillo/armadillo_exts/SymmMat_meat.hpp +++ /dev/null @@ -1,429 +0,0 @@ -template SymmMat::~SymmMat() {} - -template SymmMat::SymmMat() - : n_size(0) - , n_elem(0) - , mem_state(0) - , mem() {} - -template SymmMat::SymmMat(const uword& in_size) - : n_size(in_size) - , n_elem((in_size + 1) * in_size / 2) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - init_cold(); -} - -template SymmMat::SymmMat(const SizeMat& s) - : n_size(s.n_rows) - , n_elem((s.n_rows + 1) * s.n_cols / 2) - , mem_state(0) - , mem() { - arma_debug_check(s.n_rows != s.n_cols, "SymmMat() only accepts sqaure matrix."); - - init_cold(); -} - -template template SymmMat::SymmMat(const uword& in_size, const fill::fill_class& f) - : n_size(in_size) - , n_elem((in_size + 1) * in_size / 2) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - - init_cold(); - - (*this).fill(f); -} - -template template SymmMat::SymmMat(const SizeMat& s, const fill::fill_class& f) - : n_size(s.n_rows) - , n_elem((s.n_rows + 1) * s.n_cols / 2) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - - arma_debug_check(s.n_rows != s.n_cols, "SymmMat() only accepts sqaure matrix."); - - init_cold(); - - (*this).fill(f); -} - -template SymmMat::SymmMat(const Mat& in_mat) - : n_size(in_mat.n_rows) - , n_elem((in_mat.n_rows + 1) * in_mat.n_cols / 2) - , mem_state(0) - , mem() { - arma_debug_check(in_mat.n_rows != in_mat.n_cols, "SymmMat() only accepts sqaure matrix."); - - init_cold(); - - auto tmp_ptr = const_cast(mem); - for(auto j = 0; j < n_size; ++j) for(auto i = 0; i <= j; ++i) *tmp_ptr++ = in_mat(i, j); -} - -template SymmMat& SymmMat::operator=(const eT& val) { - init_warm(1); - access::rw(mem[0]) = val; - return *this; -} - -template SymmMat& SymmMat::operator+=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_plus(memptr(), val, n_elem); - - return *this; -} - -template SymmMat& SymmMat::operator-=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_minus(memptr(), val, n_elem); - - return *this; -} - -template SymmMat& SymmMat::operator*=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_mul(memptr(), val, n_elem); - - return *this; -} - -template SymmMat& SymmMat::operator/=(const eT& val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_div(memptr(), val, n_elem); - - return *this; -} - -template SymmMat::SymmMat(const SymmMat& m) - : n_size(m.n_size) - , n_elem(m.n_elem) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint(arma_str::format("this = %x in_mat = %x") % this % &m); - - init_cold(); - - arrayops::copy(memptr(), m.mem, m.n_elem); -} - -template SymmMat& SymmMat::operator=(const SymmMat& m) { - arma_extra_debug_sigprint(arma_str::format("this = %x in_mat = %x") % this % &m); - - if(this != &m) { - init_warm(m.n_size); - - arrayops::copy(memptr(), m.mem, m.n_elem); - } - - return *this; -} - -template SymmMat& SymmMat::operator+=(const SymmMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "addition"); - - arrayops::inplace_plus(memptr(), m.memptr(), n_elem); - - return *this; -} - -template SymmMat& SymmMat::operator-=(const SymmMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "subtraction"); - - arrayops::inplace_minus(memptr(), m.memptr(), n_elem); - - return *this; -} - -template SymmMat& SymmMat::operator%=(const SymmMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "element-wise multiplication"); - - arrayops::inplace_mul(memptr(), m.memptr(), n_elem); - - return *this; -} - -template SymmMat& SymmMat::operator/=(const SymmMat& m) { - arma_extra_debug_sigprint(); - - arma_debug_assert_same_size(*this, m, "element-wise division"); - - arrayops::inplace_div(memptr(), m.memptr(), n_elem); - - return *this; -} - -template template SymmMat::SymmMat(const SmOp& X) - : n_size(0) - , n_elem(0) - , mem_state(0) - , mem() { - arma_extra_debug_sigprint_this(this); - - arma_type_check((is_same_type::no)); - - smop_type::apply(*this, X); -} - -template template SymmMat& SymmMat::operator=(const Glue& X) { - arma_extra_debug_sigprint(); - - arma_type_check((is_same_type::no)); - arma_type_check((is_same_type::no)); - - glue_type::apply(*this, X); - - return *this; -} - -template template SymmMat& SymmMat::operator=(const SmOp& X) { - arma_extra_debug_sigprint(); - - arma_type_check((is_same_type::no)); - - smop_type::apply(*this, X); - - return *this; -} - -template eT& SymmMat::at(const uword& in_row, const uword& in_col) { - const auto tmp_loc = in_col > in_row ? (in_col * in_col + in_col) / 2 + in_row : (in_row * in_row + in_row) / 2 + in_col; - - return access::rw(mem[tmp_loc]); -} - -template const eT& SymmMat::at(const uword& in_row, const uword& in_col) const { - const auto tmp_loc = in_col > in_row ? (in_col * in_col + in_col) / 2 + in_row : (in_row * in_row + in_row) / 2 + in_col; - - return mem[tmp_loc]; -} - -template eT& SymmMat::operator()(const uword& in_row, const uword& in_col) { - arma_debug_check(in_row >= n_size || in_col >= n_size, "SymmMat::operator(): index out of bounds"); - return at(in_row, in_col); -} - -template const eT& SymmMat::operator()(const uword& in_row, const uword& in_col) const { - arma_debug_check(in_row >= n_size || in_col >= n_size, "SymmMat::operator(): index out of bounds"); - return at(in_row, in_col); -} - -template void SymmMat::init_cold() { - arma_extra_debug_sigprint(arma_str::format("n_size = %d") % n_size); - -#if(defined(ARMA_USE_CXX11) || defined(ARMA_64BIT_WORD)) - auto error_message = "SymmMat::init(): requested size is too large"; -#else - const char* error_message = "SymmMat::init(): requested size is too large; suggest to compile in C++11 mode or enable ARMA_64BIT_WORD"; -#endif - - arma_debug_check(n_size > ARMA_MAX_UHWORD ? n_elem > ARMA_MAX_UWORD : false, error_message); - - if(n_elem <= arma_config::mat_prealloc) - if(n_elem == 0) access::rw(mem) = NULL; - else { - arma_extra_debug_print("SymmMat::init(): using local memory"); - access::rw(mem) = mem_local; - } - else { - arma_extra_debug_print("SymmMat::init(): acquiring memory"); - access::rw(mem) = memory::acquire(n_elem); - } -} - -template void SymmMat::init_warm(const uword& in_size) { - arma_extra_debug_sigprint(arma_str::format("in_n_size = %d") % in_size); - - if(n_size == in_size) return; - - auto err_state = false; - char* err_msg = nullptr; - - const uhword t_mem_state = mem_state; - - arma_debug_set_error(err_state, err_msg, t_mem_state == 3, "SymmMat::init(): size is fixed and hence cannot be changed"); - -#if(defined(ARMA_USE_CXX11) || defined(ARMA_64BIT_WORD)) - auto error_message = "SymmMat::init(): requested size is too large"; -#else - const char* error_message = "SymmMat::init(): requested size is too large; suggest to compile in C++11 mode or enable ARMA_64BIT_WORD"; -#endif - - arma_debug_set_error(err_state, err_msg, in_size > ARMA_MAX_UHWORD ? (in_size + 1) * in_size / 2 > ARMA_MAX_UWORD : false, error_message); - - arma_debug_check(err_state, err_msg); - - const auto old_n_elem = n_elem; - const auto new_n_elem = (in_size + 1) * in_size / 2; - - if(old_n_elem == new_n_elem) { arma_extra_debug_print("SymmMat::init(): reusing memory"); } - else { - arma_debug_check(t_mem_state == 2, "SymmMat::init(): mismatch between size of auxiliary memory and requested size"); - - if(new_n_elem < old_n_elem) { - if(t_mem_state == 0 && new_n_elem <= arma_config::mat_prealloc) { - if(old_n_elem > arma_config::mat_prealloc) { - arma_extra_debug_print("SymmMat::init(): releasing memory"); - memory::release(access::rw(mem)); - } - - access::rw(mem) = new_n_elem == 0 ? NULL : mem_local; - } - else { arma_extra_debug_print("SymmMat::init(): reusing memory"); } - } - else { - if(t_mem_state == 0 && old_n_elem > arma_config::mat_prealloc) { - arma_extra_debug_print("SymmMat::init(): releasing memory"); - memory::release(access::rw(mem)); - } - - access::rw(mem) = new_n_elem <= arma_config::mat_prealloc ? mem_local : memory::acquire(new_n_elem); - - access::rw(mem_state) = 0; - } - - access::rw(n_size) = in_size; - access::rw(n_elem) = new_n_elem; - } -} - -template void SymmMat::print() const { - auto& o = std::cout; - - const auto save_flags = o.flags(); - const auto save_precision = o.precision(); - - o.unsetf(ios::scientific); - o.setf(ios::fixed); - o.precision(4); - - for(auto i = 0; i < n_size; i++) { - for(auto j = 0; j < n_size; j++) { - o.width(8); - o << at(i, j) << " "; - } - o << endl; - } - - o.flags(save_flags); - o.precision(save_precision); -} - -template eT* SymmMat::memptr() { return const_cast(mem); } - -template const eT* SymmMat::memptr() const { return mem; } - -template void SymmMat::set_size(const uword in_size) { - arma_extra_debug_sigprint(); - - init_warm(in_size); -} - -template const SymmMat& SymmMat::fill(const eT val) { - arma_extra_debug_sigprint(); - - arrayops::inplace_set(memptr(), val, n_elem); - - return *this; -} - -template template const SymmMat& SymmMat::fill(const fill::fill_class&) { - arma_extra_debug_sigprint(); - - if(is_same_type::yes) (*this).zeros(); - else if(is_same_type::yes) (*this).ones(); - else if(is_same_type::yes) (*this).eye(); - - return *this; -} - -template const SymmMat& SymmMat::zeros() { - arma_extra_debug_sigprint(); - - arrayops::fill_zeros(memptr(), n_elem); - - return *this; -} - -template const SymmMat& SymmMat::zeros(const uword in_size) { - arma_extra_debug_sigprint(); - - set_size(in_size); - - return (*this).zeros(); -} - -template const SymmMat& SymmMat::ones() { - arma_extra_debug_sigprint(); - - return fill(eT(1)); -} - -template const SymmMat& SymmMat::ones(const uword in_size) { - arma_extra_debug_sigprint(); - - set_size(in_size); - - return fill(eT(1)); -} - -template const SymmMat& SymmMat::eye() { - arma_extra_debug_sigprint(); - - (*this).zeros(); - - for(uword ii = 0; ii < n_size; ++ii) at(ii, ii) = eT(1); - - return *this; -} - -template const SymmMat& SymmMat::eye(const uword in_size) { - arma_extra_debug_sigprint(); - - set_size(in_size); - - return (*this).eye(); -} - -template void SymmMat::reset() { - arma_extra_debug_sigprint(); - - init_warm(0); -} - -template int sp_solve(Col& X, SymmMat& A, const Col& B) { - X = B; - - auto UPLO = 'U'; - auto N = static_cast(A.n_size); - auto NRHS = 1; - const auto IPIV = new int[N]; - auto LDB = N; - auto INFO = 0; - - if(is_float::value) { - using T = float; - arma_fortran(arma_sspsv)(&UPLO, &N, &NRHS, (T*)A.memptr(), IPIV, (T*)X.memptr(), &LDB, &INFO); - } - else if(is_double::value) { - using T = double; - arma_fortran(arma_dspsv)(&UPLO, &N, &NRHS, (T*)A.memptr(), IPIV, (T*)X.memptr(), &LDB, &INFO); - } - - delete[] IPIV; - - return INFO; -} diff --git a/Include/armadillo/armadillo_exts/debug.hpp b/Include/armadillo/armadillo_exts/debug.hpp deleted file mode 100644 index 7b8324e66..000000000 --- a/Include/armadillo/armadillo_exts/debug.hpp +++ /dev/null @@ -1,9 +0,0 @@ -template -arma_hot - -void arma_assert_same_size(const SymmMat& A, const SymmMat& B, const char* x) { - const uword A_n_size = A.n_size; - const uword B_n_size = B.n_size; - - if(A_n_size != B_n_size) arma_stop_logic_error(arma_incompat_size_string(A_n_size, A_n_size, B_n_size, B_n_size, x)); -} diff --git a/Include/armadillo/armadillo_exts/def_lapack.hpp b/Include/armadillo/armadillo_exts/def_lapack.hpp index d923f58fd..600f6b52f 100644 --- a/Include/armadillo/armadillo_exts/def_lapack.hpp +++ b/Include/armadillo/armadillo_exts/def_lapack.hpp @@ -10,8 +10,6 @@ #define arma_sspmv SSPMV #define arma_dspmv DSPMV -#define arma_sspmm SSPMM -#define arma_dspmm DSPMM #define arma_dsysv DSYSV #define arma_dsygvx DSYGVX @@ -54,8 +52,6 @@ #define arma_sspmv sspmv #define arma_dspmv dspmv -#define arma_sspmm sspmm -#define arma_dspmm dspmm #define arma_dsysv dsysv #define arma_dsygvx dsygvx @@ -103,10 +99,6 @@ void arma_fortran(arma_sspmv)(const char* UPLO, const int* N, const float* ALPHA void arma_fortran(arma_dspmv)(const char* UPLO, const int* N, const double* ALPHA, const double* AP, const double* X, const int* INCX, const double* BETA, double* Y, const int* INCY); -void arma_fortran(arma_sspmm)(const char* SIDE, const char* UPLO, const char* TRAN, const int* M, const int* N, const float* A, const float* ALPHA, const float* B, const int* LDB, const float* BETA, float* C, const int* LDC); - -void arma_fortran(arma_dspmm)(const char* SIDE, const char* UPLO, const char* TRAN, const int* M, const int* N, const double* A, const double* ALPHA, const double* B, const int* LDB, const double* BETA, double* C, const int* LDC); - // symmetric matrix void arma_fortran(arma_dsysv)(char* UPLO, int* N, int* NRHS, double* A, int* LDA, int* IPIV, double* B, int* LDB, double* WORK, int* LWORK, int* INFO); diff --git a/Include/armadillo/armadillo_exts/fn_inv.hpp b/Include/armadillo/armadillo_exts/fn_inv.hpp deleted file mode 100644 index fed8cefc9..000000000 --- a/Include/armadillo/armadillo_exts/fn_inv.hpp +++ /dev/null @@ -1,7 +0,0 @@ -template arma_warn_unused - -typename enable_if2::value && is_SymmMat::value, const SmOp>::result inv(const Base& X) { - arma_extra_debug_sigprint(); - - return SmOp(X.get_ref()); -} diff --git a/Include/armadillo/armadillo_exts/fn_solve.hpp b/Include/armadillo/armadillo_exts/fn_solve.hpp deleted file mode 100644 index 7a8486835..000000000 --- a/Include/armadillo/armadillo_exts/fn_solve.hpp +++ /dev/null @@ -1,10 +0,0 @@ -template typename enable_if2::value &&is_SymmMat::value, - - -const Glue -> -::result solve(const T1& A, const Base& B) { - arma_extra_debug_sigprint(); - - return Glue(A.get_ref(), B.get_ref()); -} diff --git a/Include/armadillo/armadillo_exts/glue_solve_bones.hpp b/Include/armadillo/armadillo_exts/glue_solve_bones.hpp deleted file mode 100644 index cf5a2dec9..000000000 --- a/Include/armadillo/armadillo_exts/glue_solve_bones.hpp +++ /dev/null @@ -1,32 +0,0 @@ -class glue_solve_symm { -public: - template static void apply(Mat& X, const Glue& S) { - arma_extra_debug_sigprint(); - - if(glue_solve_symm::apply(X, S.A, S.B) == false) arma_stop_runtime_error("solve(): solution not found"); - } - - template static bool apply(Mat& X, const T1& A, const T2& B) { - auto UPLO = 'U'; - auto N = static_cast(A.n_size); - auto NRHS = static_cast(B.n_cols); - const auto IPIV = new int[N]; - auto LDB = N; - auto INFO = 0; - - X = Mat(B.memptr(), N, NRHS); - - if(is_float::value) { - using T = float; - arma_fortran(arma_sspsv)(&UPLO, &N, &NRHS, (T*)A.memptr(), IPIV, (T*)X.memptr(), &LDB, &INFO); - } - else if(is_double::value) { - using T = double; - arma_fortran(arma_dspsv)(&UPLO, &N, &NRHS, (T*)A.memptr(), IPIV, (T*)X.memptr(), &LDB, &INFO); - } - - delete[] IPIV; - - return INFO == 0; - } -}; diff --git a/Include/armadillo/armadillo_exts/glue_solve_meat.hpp b/Include/armadillo/armadillo_exts/glue_solve_meat.hpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/Include/armadillo/armadillo_exts/glue_times_bones.hpp b/Include/armadillo/armadillo_exts/glue_times_bones.hpp deleted file mode 100644 index 7c92e41e9..000000000 --- a/Include/armadillo/armadillo_exts/glue_times_bones.hpp +++ /dev/null @@ -1,72 +0,0 @@ -class glue_times_symm { -public: - template - arma_hot - static - - void apply(Mat& out, const Glue& I) { - typedef typename T1::elem_type eT; - auto& A = I.A; - auto& X = I.B; - - out = X; - - auto UPLO = 'U'; - auto N = static_cast(A.n_size); - eT ALPHA = 1.; - auto INC = 1; - eT BETA = 0.; - - if(is_float::value) { - using T = float; - arma_fortran(arma_sspmv)(&UPLO, &N, (T*)&ALPHA, (T*)A.memptr(), (T*)X.memptr(), &INC, (T*)&BETA, (T*)out.memptr(), &INC); - } - else if(is_double::value) { - using T = double; - arma_fortran(arma_dspmv)(&UPLO, &N, (T*)&ALPHA, (T*)A.memptr(), (T*)X.memptr(), &INC, (T*)&BETA, (T*)out.memptr(), &INC); - } - } -}; - -class glue_mixed_times_symm { -public: - template inline static void apply(Mat::eT>& out, const mtGlue::eT, T1, T2, glue_mixed_times_symm>& X) { - arma_extra_debug_sigprint(); - - typedef typename T1::elem_type eT1; - typedef typename T2::elem_type eT2; - - const unwrap_check_mixed tmp1(X.A, out); - const unwrap_check_mixed tmp2(X.B, out); - - const Mat& A = tmp1.M; - const Mat& B = tmp2.M; - - arma_debug_assert_mul_size(A, B, "matrix multiplication"); - - out.set_size(A.n_rows, B.n_cols); - - gemm_mixed<>::apply(out, A, B); - } -}; - -template inline Col sp_mv(const SymmMat& A, const Col& X) { - auto Y = X; - - auto UPLO = 'U'; - auto N = static_cast(A.n_size); - eT ALPHA = 1.; - auto INC = 1; - eT BETA = 0.; - - if(is_float::value) { - using T = float; - arma_fortran(arma_sspmv)(&UPLO, &N, (T*)&ALPHA, (T*)A.memptr(), (T*)X.memptr(), &INC, (T*)&BETA, (T*)Y.memptr(), &INC); - } - else if(is_double::value) { - using T = double; - arma_fortran(arma_dspmv)(&UPLO, &N, (T*)&ALPHA, (T*)A.memptr(), (T*)X.memptr(), &INC, (T*)&BETA, (T*)Y.memptr(), &INC); - } - - return Y; -}; diff --git a/Include/armadillo/armadillo_exts/glue_times_meat.hpp b/Include/armadillo/armadillo_exts/glue_times_meat.hpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/Include/armadillo/armadillo_exts/operator_times.hpp b/Include/armadillo/armadillo_exts/operator_times.hpp deleted file mode 100644 index 9cdc5fe4d..000000000 --- a/Include/armadillo/armadillo_exts/operator_times.hpp +++ /dev/null @@ -1,61 +0,0 @@ -template typename enable_if2::value, const eOp>::result operator*(const T1& X, const typename T1::elem_type k) { - arma_extra_debug_sigprint(); - - return eOp(X, k); -} - -template typename enable_if2::value, const eOp>::result operator*(const typename T1::elem_type k, const T1& X) { - arma_extra_debug_sigprint(); - - return eOp(X, k); -} - -template typename enable_if2::value && is_Mat::value && is_same_type < typename T1::elem_type, typename T2::elem_type>::value -, -const Glue -> -::result operator*(const T1& X, const T2& Y) { - arma_extra_debug_sigprint(); - - return Glue(X, Y); -} - -template typename enable_if2::value && is_SymmMat::value && is_same_type < typename T1::elem_type, typename T2::elem_type>::value -, -const Glue -> -::result operator*(const T1& X, const T2& Y) { - arma_extra_debug_sigprint(); - - return Glue(X, Y); -} - -template inline typename enable_if2<(is_SymmMat::value && is_arma_type::value && (is_same_type < typename T1::elem_type, typename T2::elem_type > ::no)), const mtGlue::result, T1, T2, glue_mixed_times_symm> -> -::result operator*(const T1& X, const T2& Y) { - arma_extra_debug_sigprint(); - - typedef typename T1::elem_type eT1; - typedef typename T2::elem_type eT2; - - typedef typename promote_type::result out_eT; - - promote_type::check(); - - return mtGlue(X, Y); -} - -template inline typename enable_if2<(is_arma_type::value && is_SymmMat::value && (is_same_type < typename T1::elem_type, typename T2::elem_type > ::no)), const mtGlue::result, T1, T2, glue_mixed_times_symm> -> -::result operator*(const T1& X, const T2& Y) { - arma_extra_debug_sigprint(); - - typedef typename T1::elem_type eT1; - typedef typename T2::elem_type eT2; - - typedef typename promote_type::result out_eT; - - promote_type::check(); - - return mtGlue(X, Y); -} diff --git a/Include/armadillo/armadillo_exts/smop_inv_bones.hpp b/Include/armadillo/armadillo_exts/smop_inv_bones.hpp deleted file mode 100644 index bdfab3320..000000000 --- a/Include/armadillo/armadillo_exts/smop_inv_bones.hpp +++ /dev/null @@ -1,47 +0,0 @@ -class smop_inv { -public: - template static void apply(T1& out, const SmOp& in) { - arma_extra_debug_sigprint(); - - out = in.m; - - if(sp_inv(out) != 0) { - out.reset(); - arma_stop_runtime_error("sp_inv(): matrix seems singular"); - } - } -}; - -template int sp_inv(SymmMat& A) { - auto UPLO = 'U'; - auto N = static_cast(A.n_size); - const auto IPIV = new int[N]; - auto INFO = 0; - - if(is_float::value) { - using T = float; - arma_fortran(arma_ssptrf)(&UPLO, &N, (T*)A.memptr(), IPIV, &INFO); - } - else if(is_double::value) { - using T = double; - arma_fortran(arma_dsptrf)(&UPLO, &N, (T*)A.memptr(), IPIV, &INFO); - } - - if(INFO != 0) return INFO; - - const auto WORK = new eT[N]; - - if(is_float::value) { - using T = float; - arma_fortran(arma_ssptri)(&UPLO, &N, (T*)A.memptr(), IPIV, (T*)WORK, &INFO); - } - else if(is_double::value) { - using T = double; - arma_fortran(arma_dsptri)(&UPLO, &N, (T*)A.memptr(), IPIV, (T*)WORK, &INFO); - } - - delete[] WORK; - delete[] IPIV; - - return INFO; -} diff --git a/Include/armadillo/armadillo_exts/smop_inv_meat.hpp b/Include/armadillo/armadillo_exts/smop_inv_meat.hpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/Include/armadillo/armadillo_exts/traits.hpp b/Include/armadillo/armadillo_exts/traits.hpp deleted file mode 100644 index 4c0b602ac..000000000 --- a/Include/armadillo/armadillo_exts/traits.hpp +++ /dev/null @@ -1,17 +0,0 @@ -template struct is_SymmMat { - static const bool value = false; -}; - -template struct is_SymmMat> { - static const bool value = true; -}; - -// template -// struct is_BandMat { -// static const bool value = false; -//}; -// -// template -// struct is_BandMat> { -// static const bool value = true; -//}; diff --git a/Include/armadillo/armadillo_exts/typedef_mat.hpp b/Include/armadillo/armadillo_exts/typedef_mat.hpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/Include/armadillo/armadillo_shadow b/Include/armadillo/armadillo_shadow index 487c45b30..7bd9984ce 100644 --- a/Include/armadillo/armadillo_shadow +++ b/Include/armadillo/armadillo_shadow @@ -1,32 +1 @@ #include "armadillo_exts/def_lapack.hpp" - -//#include "armadillo_exts/BdOp_bones.hpp" -//#include "armadillo_exts/SmOp_bones.hpp" - -//#include "armadillo_exts/BandMat_bones.hpp" -//#include "armadillo_exts/SymmMat_bones.hpp" - -//#include "armadillo_exts/glue_solve_bones.hpp" -//#include "armadillo_exts/glue_times_bones.hpp" - -//#include "armadillo_exts/traits.hpp" - -//#include "armadillo_exts/BdOp_meat.hpp" -//#include "armadillo_exts/SmOp_meat.hpp" - -//#include "armadillo_exts/BandMat_meat.hpp" -//#include "armadillo_exts/SymmMat_meat.hpp" - -//#include "armadillo_exts/glue_solve_meat.hpp" -//#include "armadillo_exts/glue_times_meat.hpp" - -//#include "armadillo_exts/smop_inv_bones.hpp" -//#include "armadillo_exts/smop_inv_meat.hpp" - -//#include "armadillo_exts/debug.hpp" -//#include "armadillo_exts/typedef_mat.hpp" - -//#include "armadillo_exts/operator_times.hpp" - -//#include "armadillo_exts/fn_inv.hpp" -//#include "armadillo_exts/fn_solve.hpp" diff --git a/Include/catch/catch.hpp b/Include/catch/catch.hpp index d2a12427b..9b309bddc 100644 --- a/Include/catch/catch.hpp +++ b/Include/catch/catch.hpp @@ -1,6 +1,6 @@ /* - * Catch v2.13.9 - * Generated: 2022-04-12 22:37:23.260201 + * Catch v2.13.10 + * Generated: 2022-10-16 11:01:23.452308 * ---------------------------------------------------------- * This file has been merged from multiple headers. Please don't edit it directly * Copyright (c) 2022 Two Blue Cubes Ltd. All rights reserved. @@ -15,7 +15,7 @@ #define CATCH_VERSION_MAJOR 2 #define CATCH_VERSION_MINOR 13 -#define CATCH_VERSION_PATCH 9 +#define CATCH_VERSION_PATCH 10 #ifdef __clang__ # pragma clang system_header @@ -7395,8 +7395,6 @@ namespace Catch { template struct ObjectStorage { - using TStorage = typename std::aligned_storage::value>::type; - ObjectStorage() : data() {} ObjectStorage(const ObjectStorage& other) @@ -7439,7 +7437,7 @@ namespace Catch { return *static_cast(static_cast(&data)); } - TStorage data; + struct { alignas(T) unsigned char data[sizeof(T)]; } data; }; } @@ -7949,7 +7947,7 @@ namespace Catch { #if defined(__i386__) || defined(__x86_64__) #define CATCH_TRAP() __asm__("int $3\n" : : ) /* NOLINT */ #elif defined(__aarch64__) - #define CATCH_TRAP() __asm__(".inst 0xd4200000") + #define CATCH_TRAP() __asm__(".inst 0xd43e0000") #endif #elif defined(CATCH_PLATFORM_IPHONE) @@ -13558,7 +13556,7 @@ namespace Catch { // Handle list request if( Option listed = list( m_config ) ) - return static_cast( *listed ); + return (std::min) (MaxExitCode, static_cast(*listed)); TestGroup tests { m_config }; auto const totals = tests.execute(); @@ -15391,7 +15389,7 @@ namespace Catch { } Version const& libraryVersion() { - static Version version( 2, 13, 9, "", 0 ); + static Version version( 2, 13, 10, "", 0 ); return version; } @@ -17526,12 +17524,20 @@ namespace Catch { #ifndef __OBJC__ +#ifndef CATCH_INTERNAL_CDECL +#ifdef _MSC_VER +#define CATCH_INTERNAL_CDECL __cdecl +#else +#define CATCH_INTERNAL_CDECL +#endif +#endif + #if defined(CATCH_CONFIG_WCHAR) && defined(CATCH_PLATFORM_WINDOWS) && defined(_UNICODE) && !defined(DO_NOT_USE_WMAIN) // Standard C/C++ Win32 Unicode wmain entry point -extern "C" int wmain (int argc, wchar_t * argv[], wchar_t * []) { +extern "C" int CATCH_INTERNAL_CDECL wmain (int argc, wchar_t * argv[], wchar_t * []) { #else // Standard C/C++ main entry point -int main (int argc, char * argv[]) { +int CATCH_INTERNAL_CDECL main (int argc, char * argv[]) { #endif return Catch::Session().run( argc, argv ); diff --git a/Libs/win/libfext.dll b/Libs/win/libfext.dll index 05a235bca..ce5476ad9 100644 Binary files a/Libs/win/libfext.dll and b/Libs/win/libfext.dll differ diff --git a/Libs/win/libfext.lib b/Libs/win/libfext.lib index 8a7b2206f..bb3a0df0a 100644 Binary files a/Libs/win/libfext.lib and b/Libs/win/libfext.lib differ diff --git a/Libs/win/libmetis.dll b/Libs/win/libmetis.dll index 527d1d52a..28c9f77b6 100644 Binary files a/Libs/win/libmetis.dll and b/Libs/win/libmetis.dll differ diff --git a/Libs/win/libmetis.lib b/Libs/win/libmetis.lib index c02568a4c..a9206a4e4 100644 Binary files a/Libs/win/libmetis.lib and b/Libs/win/libmetis.lib differ diff --git a/MSVC/suanPan/ElementExample/ElementExample.vcxproj b/MSVC/suanPan/ElementExample/ElementExample.vcxproj index 314350ae5..3f77cbeeb 100644 --- a/MSVC/suanPan/ElementExample/ElementExample.vcxproj +++ b/MSVC/suanPan/ElementExample/ElementExample.vcxproj @@ -17,7 +17,7 @@ DynamicLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/MaterialExample/MaterialExample.vcxproj b/MSVC/suanPan/MaterialExample/MaterialExample.vcxproj index b391ccba3..b39b0de61 100644 --- a/MSVC/suanPan/MaterialExample/MaterialExample.vcxproj +++ b/MSVC/suanPan/MaterialExample/MaterialExample.vcxproj @@ -17,7 +17,7 @@ DynamicLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/ModifierExample/ModifierExample.vcxproj b/MSVC/suanPan/ModifierExample/ModifierExample.vcxproj index 32c1ac984..d409d7de1 100644 --- a/MSVC/suanPan/ModifierExample/ModifierExample.vcxproj +++ b/MSVC/suanPan/ModifierExample/ModifierExample.vcxproj @@ -17,7 +17,7 @@ DynamicLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/ModuleBundle/ModuleBundle.vcxproj b/MSVC/suanPan/ModuleBundle/ModuleBundle.vcxproj index a48c9f213..05d2a0b91 100644 --- a/MSVC/suanPan/ModuleBundle/ModuleBundle.vcxproj +++ b/MSVC/suanPan/ModuleBundle/ModuleBundle.vcxproj @@ -17,7 +17,7 @@ DynamicLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/SectionExample/SectionExample.vcxproj b/MSVC/suanPan/SectionExample/SectionExample.vcxproj index 7fbdfba8c..4b8c26a98 100644 --- a/MSVC/suanPan/SectionExample/SectionExample.vcxproj +++ b/MSVC/suanPan/SectionExample/SectionExample.vcxproj @@ -17,7 +17,7 @@ DynamicLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/arpack/arpack.vfproj b/MSVC/suanPan/arpack/arpack.vfproj index 3a37d50e8..11b23e889 100644 --- a/MSVC/suanPan/arpack/arpack.vfproj +++ b/MSVC/suanPan/arpack/arpack.vfproj @@ -14,6 +14,7 @@ + @@ -26,9 +27,7 @@ - - @@ -53,14 +52,9 @@ - - - - - + - @@ -84,7 +78,6 @@ - @@ -97,5 +90,6 @@ - + + diff --git a/MSVC/suanPan/metis/metis.vcxproj b/MSVC/suanPan/metis/metis.vcxproj index 49f10cc51..b943df6ed 100644 --- a/MSVC/suanPan/metis/metis.vcxproj +++ b/MSVC/suanPan/metis/metis.vcxproj @@ -21,7 +21,7 @@ StaticLibrary false - v142 + v143 true Unicode x64 @@ -29,7 +29,7 @@ StaticLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/mumps_c/mumps_c.vcxproj b/MSVC/suanPan/mumps_c/mumps_c.vcxproj index 467c11bdd..162b5469b 100644 --- a/MSVC/suanPan/mumps_c/mumps_c.vcxproj +++ b/MSVC/suanPan/mumps_c/mumps_c.vcxproj @@ -17,7 +17,7 @@ StaticLibrary false - v142 + v143 true Unicode x64 diff --git a/MSVC/suanPan/suanPan.sln b/MSVC/suanPan/suanPan.sln index 791cf894f..2f53a5217 100644 --- a/MSVC/suanPan/suanPan.sln +++ b/MSVC/suanPan/suanPan.sln @@ -1,12 +1,11 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.1.32407.343 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.33027.164 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "suanPan", "suanPan\suanPan.vcxproj", "{1AC2C10B-9DDD-4178-82A4-70C309C305BF}" ProjectSection(ProjectDependencies) = postProject {C8479E1D-A9C4-4FE9-B283-3F6317F9ECF5} = {C8479E1D-A9C4-4FE9-B283-3F6317F9ECF5} - {6540EB29-E749-431D-AB70-3A5AA4053834} = {6540EB29-E749-431D-AB70-3A5AA4053834} {5270594C-8360-4FB3-BF8D-49BF1E51C5FE} = {5270594C-8360-4FB3-BF8D-49BF1E51C5FE} {E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1} = {E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1} {C41240E3-D993-440D-B994-4C721A27BC51} = {C41240E3-D993-440D-B994-4C721A27BC51} @@ -20,8 +19,6 @@ Project("{6989167D-11E4-40FE-8C1A-2192A86A7E90}") = "amd", "amd\amd.vfproj", "{C EndProject Project("{6989167D-11E4-40FE-8C1A-2192A86A7E90}") = "arpack", "arpack\arpack.vfproj", "{E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1}" EndProject -Project("{6989167D-11E4-40FE-8C1A-2192A86A7E90}") = "spmm", "spmm\spmm.vfproj", "{6540EB29-E749-431D-AB70-3A5AA4053834}" -EndProject Project("{6989167D-11E4-40FE-8C1A-2192A86A7E90}") = "feast", "feast\feast.vfproj", "{245BA1FE-5C90-492F-B308-65A6C0068A69}" EndProject Project("{6989167D-11E4-40FE-8C1A-2192A86A7E90}") = "mumps_f", "mumps_f\mumps_f.vfproj", "{C41240E3-D993-440D-B994-4C721A27BC51}" @@ -87,9 +84,6 @@ Global {E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1}.Debug|x64.ActiveCfg = Release|x64 {E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1}.Release|x64.ActiveCfg = Release|x64 {E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1}.Release|x64.Build.0 = Release|x64 - {6540EB29-E749-431D-AB70-3A5AA4053834}.Debug|x64.ActiveCfg = Release|x64 - {6540EB29-E749-431D-AB70-3A5AA4053834}.Release|x64.ActiveCfg = Release|x64 - {6540EB29-E749-431D-AB70-3A5AA4053834}.Release|x64.Build.0 = Release|x64 {245BA1FE-5C90-492F-B308-65A6C0068A69}.Debug|x64.ActiveCfg = Release|x64 {245BA1FE-5C90-492F-B308-65A6C0068A69}.Release|x64.ActiveCfg = Release|x64 {245BA1FE-5C90-492F-B308-65A6C0068A69}.Release|x64.Build.0 = Release|x64 @@ -133,7 +127,6 @@ Global GlobalSection(NestedProjects) = preSolution {C8479E1D-A9C4-4FE9-B283-3F6317F9ECF5} = {10DA8914-6427-42A5-B20C-96CE5DDE8301} {E75FB1D9-6EB8-4C3B-8130-7B1CBE5EA7C1} = {10DA8914-6427-42A5-B20C-96CE5DDE8301} - {6540EB29-E749-431D-AB70-3A5AA4053834} = {10DA8914-6427-42A5-B20C-96CE5DDE8301} {245BA1FE-5C90-492F-B308-65A6C0068A69} = {10DA8914-6427-42A5-B20C-96CE5DDE8301} {C41240E3-D993-440D-B994-4C721A27BC51} = {10DA8914-6427-42A5-B20C-96CE5DDE8301} {86C5CA2F-6514-413E-AC69-0DD78BBB9C33} = {04BD7C4D-B05C-4173-A53F-0DEFCDAF6CE3} diff --git a/MSVC/suanPan/suanPan/suanPan.vcxproj b/MSVC/suanPan/suanPan/suanPan.vcxproj index 687e4fc52..ff88d8193 100644 --- a/MSVC/suanPan/suanPan/suanPan.vcxproj +++ b/MSVC/suanPan/suanPan/suanPan.vcxproj @@ -20,7 +20,7 @@ Application - v142 + v143 Unicode x64 true @@ -28,7 +28,7 @@ Application false - v142 + v143 true Unicode x64 @@ -92,7 +92,7 @@ true true ../../../Libs/vs;../../$(Configuration)/Libs;$(VTK_LIB);$(CUDA_PATH)/lib/x64;%(AdditionalLibraryDirectories) - cusparse.lib;cusolver.lib;cublas.lib;cudart.lib;amd.lib;arpack.lib;metis.lib;mumps_c.lib;mumps_f.lib;feast.lib;spmm.lib;superlu.lib;libhdf5.lib;libhdf5_hl.lib;opengl32.lib;wsock32.lib;psapi.lib;dbghelp.lib;vtkcgns-9.2.lib;vtkChartsCore-9.2.lib;vtkCommonColor-9.2.lib;vtkCommonComputationalGeometry-9.2.lib;vtkCommonCore-9.2.lib;vtkCommonDataModel-9.2.lib;vtkCommonExecutionModel-9.2.lib;vtkCommonMath-9.2.lib;vtkCommonMisc-9.2.lib;vtkCommonSystem-9.2.lib;vtkCommonTransforms-9.2.lib;vtkDICOMParser-9.2.lib;vtkDomainsChemistry-9.2.lib;vtkDomainsChemistryOpenGL2-9.2.lib;vtkdoubleconversion-9.2.lib;vtkexodusII-9.2.lib;vtkexpat-9.2.lib;vtkFiltersAMR-9.2.lib;vtkFiltersCore-9.2.lib;vtkFiltersExtraction-9.2.lib;vtkFiltersFlowPaths-9.2.lib;vtkFiltersGeneral-9.2.lib;vtkFiltersGeneric-9.2.lib;vtkFiltersGeometry-9.2.lib;vtkFiltersHybrid-9.2.lib;vtkFiltersHyperTree-9.2.lib;vtkFiltersImaging-9.2.lib;vtkFiltersModeling-9.2.lib;vtkFiltersParallel-9.2.lib;vtkFiltersParallelImaging-9.2.lib;vtkFiltersPoints-9.2.lib;vtkFiltersProgrammable-9.2.lib;vtkFiltersSelection-9.2.lib;vtkFiltersSMP-9.2.lib;vtkFiltersSources-9.2.lib;vtkFiltersStatistics-9.2.lib;vtkFiltersTexture-9.2.lib;vtkFiltersTopology-9.2.lib;vtkFiltersVerdict-9.2.lib;vtkfmt-9.2.lib;vtkfreetype-9.2.lib;vtkGeovisCore-9.2.lib;vtkgl2ps-9.2.lib;vtkglew-9.2.lib;vtkhdf5_hl-9.2.lib;vtkhdf5-9.2.lib;vtkImagingColor-9.2.lib;vtkImagingCore-9.2.lib;vtkImagingFourier-9.2.lib;vtkImagingGeneral-9.2.lib;vtkImagingHybrid-9.2.lib;vtkImagingMath-9.2.lib;vtkImagingMorphological-9.2.lib;vtkImagingSources-9.2.lib;vtkImagingStatistics-9.2.lib;vtkImagingStencil-9.2.lib;vtkInfovisCore-9.2.lib;vtkInfovisLayout-9.2.lib;vtkInteractionImage-9.2.lib;vtkInteractionStyle-9.2.lib;vtkInteractionWidgets-9.2.lib;vtkIOAMR-9.2.lib;vtkIOAsynchronous-9.2.lib;vtkIOCesium3DTiles-9.2.lib;vtkIOCGNSReader-9.2.lib;vtkIOChemistry-9.2.lib;vtkIOCityGML-9.2.lib;vtkIOCONVERGECFD-9.2.lib;vtkIOCore-9.2.lib;vtkIOEnSight-9.2.lib;vtkIOExodus-9.2.lib;vtkIOExport-9.2.lib;vtkIOExportGL2PS-9.2.lib;vtkIOExportPDF-9.2.lib;vtkIOGeometry-9.2.lib;vtkIOHDF-9.2.lib;vtkIOImage-9.2.lib;vtkIOImport-9.2.lib;vtkIOInfovis-9.2.lib;vtkIOIOSS-9.2.lib;vtkIOLegacy-9.2.lib;vtkIOLSDyna-9.2.lib;vtkIOMINC-9.2.lib;vtkIOMotionFX-9.2.lib;vtkIOMovie-9.2.lib;vtkIONetCDF-9.2.lib;vtkIOOggTheora-9.2.lib;vtkIOParallel-9.2.lib;vtkIOParallelXML-9.2.lib;vtkIOPLY-9.2.lib;vtkIOSegY-9.2.lib;vtkIOSQL-9.2.lib;vtkioss-9.2.lib;vtkIOTecplotTable-9.2.lib;vtkIOVeraOut-9.2.lib;vtkIOVideo-9.2.lib;vtkIOXML-9.2.lib;vtkIOXMLParser-9.2.lib;vtkjpeg-9.2.lib;vtkjsoncpp-9.2.lib;vtkkissfft-9.2.lib;vtklibharu-9.2.lib;vtklibproj-9.2.lib;vtklibxml2-9.2.lib;vtkloguru-9.2.lib;vtklz4-9.2.lib;vtklzma-9.2.lib;vtkmetaio-9.2.lib;vtknetcdf-9.2.lib;vtkogg-9.2.lib;vtkParallelCore-9.2.lib;vtkParallelDIY-9.2.lib;vtkpng-9.2.lib;vtkpugixml-9.2.lib;vtkRenderingAnnotation-9.2.lib;vtkRenderingContext2D-9.2.lib;vtkRenderingContextOpenGL2-9.2.lib;vtkRenderingCore-9.2.lib;vtkRenderingFreeType-9.2.lib;vtkRenderingGL2PSOpenGL2-9.2.lib;vtkRenderingHyperTreeGrid-9.2.lib;vtkRenderingImage-9.2.lib;vtkRenderingLabel-9.2.lib;vtkRenderingLICOpenGL2-9.2.lib;vtkRenderingLOD-9.2.lib;vtkRenderingOpenGL2-9.2.lib;vtkRenderingSceneGraph-9.2.lib;vtkRenderingUI-9.2.lib;vtkRenderingVolume-9.2.lib;vtkRenderingVolumeOpenGL2-9.2.lib;vtkRenderingVtkJS-9.2.lib;vtksqlite-9.2.lib;vtksys-9.2.lib;vtkTestingRendering-9.2.lib;vtktheora-9.2.lib;vtktiff-9.2.lib;vtkverdict-9.2.lib;vtkViewsContext2D-9.2.lib;vtkViewsCore-9.2.lib;vtkViewsInfovis-9.2.lib;vtkWrappingTools-9.2.lib;vtkzlib-9.2.lib;%(AdditionalDependencies) + cusparse.lib;cusolver.lib;cublas.lib;cudart.lib;amd.lib;arpack.lib;metis.lib;mumps_c.lib;mumps_f.lib;feast.lib;superlu.lib;libhdf5.lib;libhdf5_hl.lib;opengl32.lib;wsock32.lib;psapi.lib;dbghelp.lib;vtkcgns-9.2.lib;vtkChartsCore-9.2.lib;vtkCommonColor-9.2.lib;vtkCommonComputationalGeometry-9.2.lib;vtkCommonCore-9.2.lib;vtkCommonDataModel-9.2.lib;vtkCommonExecutionModel-9.2.lib;vtkCommonMath-9.2.lib;vtkCommonMisc-9.2.lib;vtkCommonSystem-9.2.lib;vtkCommonTransforms-9.2.lib;vtkDICOMParser-9.2.lib;vtkDomainsChemistry-9.2.lib;vtkDomainsChemistryOpenGL2-9.2.lib;vtkdoubleconversion-9.2.lib;vtkexodusII-9.2.lib;vtkexpat-9.2.lib;vtkFiltersAMR-9.2.lib;vtkFiltersCore-9.2.lib;vtkFiltersExtraction-9.2.lib;vtkFiltersFlowPaths-9.2.lib;vtkFiltersGeneral-9.2.lib;vtkFiltersGeneric-9.2.lib;vtkFiltersGeometry-9.2.lib;vtkFiltersHybrid-9.2.lib;vtkFiltersHyperTree-9.2.lib;vtkFiltersImaging-9.2.lib;vtkFiltersModeling-9.2.lib;vtkFiltersParallel-9.2.lib;vtkFiltersParallelImaging-9.2.lib;vtkFiltersPoints-9.2.lib;vtkFiltersProgrammable-9.2.lib;vtkFiltersSelection-9.2.lib;vtkFiltersSMP-9.2.lib;vtkFiltersSources-9.2.lib;vtkFiltersStatistics-9.2.lib;vtkFiltersTexture-9.2.lib;vtkFiltersTopology-9.2.lib;vtkFiltersVerdict-9.2.lib;vtkfmt-9.2.lib;vtkfreetype-9.2.lib;vtkGeovisCore-9.2.lib;vtkgl2ps-9.2.lib;vtkglew-9.2.lib;vtkhdf5_hl-9.2.lib;vtkhdf5-9.2.lib;vtkImagingColor-9.2.lib;vtkImagingCore-9.2.lib;vtkImagingFourier-9.2.lib;vtkImagingGeneral-9.2.lib;vtkImagingHybrid-9.2.lib;vtkImagingMath-9.2.lib;vtkImagingMorphological-9.2.lib;vtkImagingSources-9.2.lib;vtkImagingStatistics-9.2.lib;vtkImagingStencil-9.2.lib;vtkInfovisCore-9.2.lib;vtkInfovisLayout-9.2.lib;vtkInteractionImage-9.2.lib;vtkInteractionStyle-9.2.lib;vtkInteractionWidgets-9.2.lib;vtkIOAMR-9.2.lib;vtkIOAsynchronous-9.2.lib;vtkIOCesium3DTiles-9.2.lib;vtkIOCGNSReader-9.2.lib;vtkIOChemistry-9.2.lib;vtkIOCityGML-9.2.lib;vtkIOCONVERGECFD-9.2.lib;vtkIOCore-9.2.lib;vtkIOEnSight-9.2.lib;vtkIOExodus-9.2.lib;vtkIOExport-9.2.lib;vtkIOExportGL2PS-9.2.lib;vtkIOExportPDF-9.2.lib;vtkIOGeometry-9.2.lib;vtkIOHDF-9.2.lib;vtkIOImage-9.2.lib;vtkIOImport-9.2.lib;vtkIOInfovis-9.2.lib;vtkIOIOSS-9.2.lib;vtkIOLegacy-9.2.lib;vtkIOLSDyna-9.2.lib;vtkIOMINC-9.2.lib;vtkIOMotionFX-9.2.lib;vtkIOMovie-9.2.lib;vtkIONetCDF-9.2.lib;vtkIOOggTheora-9.2.lib;vtkIOParallel-9.2.lib;vtkIOParallelXML-9.2.lib;vtkIOPLY-9.2.lib;vtkIOSegY-9.2.lib;vtkIOSQL-9.2.lib;vtkioss-9.2.lib;vtkIOTecplotTable-9.2.lib;vtkIOVeraOut-9.2.lib;vtkIOVideo-9.2.lib;vtkIOXML-9.2.lib;vtkIOXMLParser-9.2.lib;vtkjpeg-9.2.lib;vtkjsoncpp-9.2.lib;vtkkissfft-9.2.lib;vtklibharu-9.2.lib;vtklibproj-9.2.lib;vtklibxml2-9.2.lib;vtkloguru-9.2.lib;vtklz4-9.2.lib;vtklzma-9.2.lib;vtkmetaio-9.2.lib;vtknetcdf-9.2.lib;vtkogg-9.2.lib;vtkParallelCore-9.2.lib;vtkParallelDIY-9.2.lib;vtkpng-9.2.lib;vtkpugixml-9.2.lib;vtkRenderingAnnotation-9.2.lib;vtkRenderingContext2D-9.2.lib;vtkRenderingContextOpenGL2-9.2.lib;vtkRenderingCore-9.2.lib;vtkRenderingFreeType-9.2.lib;vtkRenderingGL2PSOpenGL2-9.2.lib;vtkRenderingHyperTreeGrid-9.2.lib;vtkRenderingImage-9.2.lib;vtkRenderingLabel-9.2.lib;vtkRenderingLICOpenGL2-9.2.lib;vtkRenderingLOD-9.2.lib;vtkRenderingOpenGL2-9.2.lib;vtkRenderingSceneGraph-9.2.lib;vtkRenderingUI-9.2.lib;vtkRenderingVolume-9.2.lib;vtkRenderingVolumeOpenGL2-9.2.lib;vtkRenderingVtkJS-9.2.lib;vtksqlite-9.2.lib;vtksys-9.2.lib;vtkTestingRendering-9.2.lib;vtktheora-9.2.lib;vtktiff-9.2.lib;vtkverdict-9.2.lib;vtkViewsContext2D-9.2.lib;vtkViewsCore-9.2.lib;vtkViewsInfovis-9.2.lib;vtkWrappingTools-9.2.lib;vtkzlib-9.2.lib;%(AdditionalDependencies) @@ -128,6 +128,7 @@ + @@ -137,6 +138,7 @@ + @@ -293,6 +295,8 @@ + + @@ -450,15 +454,19 @@ + + + + @@ -487,6 +495,7 @@ + @@ -535,6 +544,7 @@ + @@ -544,6 +554,7 @@ + @@ -729,6 +740,8 @@ + + @@ -887,15 +900,19 @@ + + + + diff --git a/MSVC/suanPan/suanPan/suanPan.vcxproj.filters b/MSVC/suanPan/suanPan/suanPan.vcxproj.filters index f8ab5399f..94f30a660 100644 --- a/MSVC/suanPan/suanPan/suanPan.vcxproj.filters +++ b/MSVC/suanPan/suanPan/suanPan.vcxproj.filters @@ -1417,6 +1417,33 @@ Element\Modifier + + Solver\Integrator + + + Converger + + + Converger + + + Solver\Integrator + + + Solver\Integrator + + + Solver\Integrator + + + UnitTest + + + Material\Material1D\Elastic + + + Material\Material1D\Elastic + @@ -2704,6 +2731,30 @@ Toolbox + + Solver\Integrator + + + Converger + + + Converger + + + Solver\Integrator + + + Solver\Integrator + + + Solver\Integrator + + + Material\Material1D\Elastic + + + Material\Material1D\Elastic + diff --git a/MSVC/suanPan/superlu/superlu.vcxproj b/MSVC/suanPan/superlu/superlu.vcxproj index 4db1771f3..91a6b6b3c 100644 --- a/MSVC/suanPan/superlu/superlu.vcxproj +++ b/MSVC/suanPan/superlu/superlu.vcxproj @@ -21,7 +21,7 @@ StaticLibrary false - v142 + v143 true Unicode x64 @@ -29,7 +29,7 @@ StaticLibrary true - v142 + v143 Unicode x64 diff --git a/MSVC/suanPan/superlumt/superlumt.vcxproj b/MSVC/suanPan/superlumt/superlumt.vcxproj index 28e3342b3..31c566acd 100644 --- a/MSVC/suanPan/superlumt/superlumt.vcxproj +++ b/MSVC/suanPan/superlumt/superlumt.vcxproj @@ -21,7 +21,7 @@ StaticLibrary false - v142 + v143 true Unicode x64 @@ -29,7 +29,7 @@ StaticLibrary true - v142 + v143 Unicode x64 diff --git a/Material/Material1D/CMakeLists.txt b/Material/Material1D/CMakeLists.txt index 89fc835bd..3166a57cb 100644 --- a/Material/Material1D/CMakeLists.txt +++ b/Material/Material1D/CMakeLists.txt @@ -10,6 +10,8 @@ set(M1D Material1D/Elastic/Elastic1D.cpp Material1D/Elastic/MultilinearElastic1D.cpp Material1D/Elastic/PolyElastic1D.cpp + Material1D/Elastic/Sinh1D.cpp + Material1D/Elastic/Tanh1D.cpp Material1D/Hysteresis/AFC.cpp Material1D/Hysteresis/BilinearOO.cpp Material1D/Hysteresis/BilinearPO.cpp diff --git a/Material/Material1D/Elastic/Sinh1D.cpp b/Material/Material1D/Elastic/Sinh1D.cpp new file mode 100644 index 000000000..56308203e --- /dev/null +++ b/Material/Material1D/Elastic/Sinh1D.cpp @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "Sinh1D.h" + +Sinh1D::Sinh1D(const unsigned T, const double E, const double R) + : DataSinh1D{E} + , Material1D(T, R) {} + +int Sinh1D::initialize(const shared_ptr&) { + trial_stiffness = current_stiffness = initial_stiffness = elastic_modulus; + + return SUANPAN_SUCCESS; +} + +unique_ptr Sinh1D::get_copy() { return make_unique(*this); } + +int Sinh1D::update_trial_status(const vec& t_strain) { + trial_stress = elastic_modulus * arma::sinh(trial_strain = t_strain); + trial_stiffness = elastic_modulus * arma::cosh(trial_strain); + return SUANPAN_SUCCESS; +} + +int Sinh1D::clear_status() { + current_strain = trial_strain.zeros(); + current_stress = trial_stress.zeros(); + current_stiffness = trial_stiffness = elastic_modulus; + return 0; +} + +int Sinh1D::commit_status() { + current_strain = trial_strain; + current_stress = trial_stress; + current_stiffness = trial_stiffness; + return 0; +} + +int Sinh1D::reset_status() { + trial_strain = current_strain; + trial_stress = current_stress; + trial_stiffness = current_stiffness; + return 0; +} + +void Sinh1D::print() { + suanpan_info("A 1D nonlinear elastic material using sinh function with an elastic modulus of %.4E.\n", elastic_modulus); + Material1D::print(); +} diff --git a/Material/Material1D/Elastic/Sinh1D.h b/Material/Material1D/Elastic/Sinh1D.h new file mode 100644 index 000000000..7b3478374 --- /dev/null +++ b/Material/Material1D/Elastic/Sinh1D.h @@ -0,0 +1,58 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class Sinh1D + * @brief A 1D Elastic class. + * @author tlc + * @date 12/12/2022 + * @file Sinh1D.h + * @addtogroup Material-1D + * @{ + */ + +#ifndef SINH1D_H +#define SINH1D_H + +#include + +struct DataSinh1D { + const double elastic_modulus; // elastic modulus +}; + +class Sinh1D final : DataSinh1D, public Material1D { +public: + Sinh1D(unsigned, // tag + double, // elastic modulus + double = 0. // density + ); + + int initialize(const shared_ptr&) override; + + unique_ptr get_copy() override; + + int update_trial_status(const vec&) override; + + int clear_status() override; + int commit_status() override; + int reset_status() override; + + void print() override; +}; + +#endif + +//! @} diff --git a/Material/Material1D/Elastic/Tanh1D.cpp b/Material/Material1D/Elastic/Tanh1D.cpp new file mode 100644 index 000000000..33c8f1175 --- /dev/null +++ b/Material/Material1D/Elastic/Tanh1D.cpp @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "Tanh1D.h" + +Tanh1D::Tanh1D(const unsigned T, const double E, const double R) + : DataTanh1D{E} + , Material1D(T, R) {} + +int Tanh1D::initialize(const shared_ptr&) { + trial_stiffness = current_stiffness = initial_stiffness = elastic_modulus; + + return SUANPAN_SUCCESS; +} + +unique_ptr Tanh1D::get_copy() { return make_unique(*this); } + +int Tanh1D::update_trial_status(const vec& t_strain) { + trial_stress = elastic_modulus * arma::tanh(trial_strain = t_strain); + trial_stiffness = elastic_modulus * arma::pow(arma::cosh(trial_strain), -2.); + return SUANPAN_SUCCESS; +} + +int Tanh1D::clear_status() { + current_strain = trial_strain.zeros(); + current_stress = trial_stress.zeros(); + current_stiffness = trial_stiffness = elastic_modulus; + return 0; +} + +int Tanh1D::commit_status() { + current_strain = trial_strain; + current_stress = trial_stress; + current_stiffness = trial_stiffness; + return 0; +} + +int Tanh1D::reset_status() { + trial_strain = current_strain; + trial_stress = current_stress; + trial_stiffness = current_stiffness; + return 0; +} + +void Tanh1D::print() { + suanpan_info("A 1D nonlinear elastic material using tanh function with an elastic modulus of %.4E.\n", elastic_modulus); + Material1D::print(); +} diff --git a/Material/Material1D/Elastic/Tanh1D.h b/Material/Material1D/Elastic/Tanh1D.h new file mode 100644 index 000000000..8d1f7d893 --- /dev/null +++ b/Material/Material1D/Elastic/Tanh1D.h @@ -0,0 +1,58 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class Tanh1D + * @brief A 1D Elastic class. + * @author tlc + * @date 12/12/2022 + * @file Tanh1D.h + * @addtogroup Material-1D + * @{ + */ + +#ifndef Tanh1D_H +#define Tanh1D_H + +#include + +struct DataTanh1D { + const double elastic_modulus; // elastic modulus +}; + +class Tanh1D final : DataTanh1D, public Material1D { +public: + Tanh1D(unsigned, // tag + double, // elastic modulus + double = 0. // density + ); + + int initialize(const shared_ptr&) override; + + unique_ptr get_copy() override; + + int update_trial_status(const vec&) override; + + int clear_status() override; + int commit_status() override; + int reset_status() override; + + void print() override; +}; + +#endif + +//! @} diff --git a/Material/Material1D/Material1D b/Material/Material1D/Material1D index a0e49f9f5..c84a0be83 100644 --- a/Material/Material1D/Material1D +++ b/Material/Material1D/Material1D @@ -9,11 +9,13 @@ #include "Elastic/Elastic1D.h" #include "Elastic/MultilinearElastic1D.h" #include "Elastic/PolyElastic1D.h" +#include "Elastic/Sinh1D.h" +#include "Elastic/Tanh1D.h" #include "Hysteresis/AFC.h" -#include "Hysteresis/BWBN.h" #include "Hysteresis/BilinearOO.h" #include "Hysteresis/BilinearPO.h" #include "Hysteresis/BoucWen.h" +#include "Hysteresis/BWBN.h" #include "Hysteresis/CoulombFriction.h" #include "Hysteresis/Flag.h" #include "Hysteresis/Gap01.h" @@ -34,9 +36,6 @@ #include "Viscosity/NonlinearViscosity.h" #include "Viscosity/Viscosity01.h" #include "Viscosity/Viscosity02.h" -#include "Wrapper/Parallel.h" -#include "Wrapper/Sequential.h" -#include "Wrapper/Uniaxial.h" #include "vonMises/ArmstrongFrederick1D.h" #include "vonMises/Bilinear1D.h" #include "vonMises/BilinearMises1D.h" @@ -45,4 +44,7 @@ #include "vonMises/MultilinearMises1D.h" #include "vonMises/NonlinearGurson1D.h" #include "vonMises/NonlinearMises1D.h" -#include "vonMises/VAFCRP1D.h" \ No newline at end of file +#include "vonMises/VAFCRP1D.h" +#include "Wrapper/Parallel.h" +#include "Wrapper/Sequential.h" +#include "Wrapper/Uniaxial.h" \ No newline at end of file diff --git a/Material/Material1D/Viscosity/Maxwell.cpp b/Material/Material1D/Viscosity/Maxwell.cpp index d94541b7a..db4d013fb 100644 --- a/Material/Material1D/Viscosity/Maxwell.cpp +++ b/Material/Material1D/Viscosity/Maxwell.cpp @@ -17,7 +17,7 @@ #include "Maxwell.h" #include -#include +#include #include Maxwell::Maxwell(const unsigned T, const unsigned DT, const unsigned ST, const bool UM, const unsigned PC, const double BT) @@ -45,7 +45,7 @@ int Maxwell::initialize(const shared_ptr& D) { if(nullptr == damper || nullptr == spring) return SUANPAN_FAIL; - incre_time = &D->get_factory()->get_incre_time(); + incre_time = &get_incre_time(D->get_factory()); trial_strain_rate = current_strain_rate = incre_strain_rate.zeros(1); diff --git a/Material/Material1D/vonMises/ArmstrongFrederick1D.cpp b/Material/Material1D/vonMises/ArmstrongFrederick1D.cpp index 528d12659..4ce958534 100644 --- a/Material/Material1D/vonMises/ArmstrongFrederick1D.cpp +++ b/Material/Material1D/vonMises/ArmstrongFrederick1D.cpp @@ -131,12 +131,9 @@ int ArmstrongFrederick1D::reset_status() { } vector ArmstrongFrederick1D::record(const OutputType P) { - vector data; + if(P == OutputType::PEEQ) return {vec{current_history(size)}}; - if(P == OutputType::PEEQ) data.emplace_back(vec{current_history(size)}); - else return Material1D::record(P); - - return data; + return Material1D::record(P); } void ArmstrongFrederick1D::print() { diff --git a/Material/Material1D/vonMises/NonlinearGurson1D.cpp b/Material/Material1D/vonMises/NonlinearGurson1D.cpp index 0ebf315b0..7bb0f1710 100644 --- a/Material/Material1D/vonMises/NonlinearGurson1D.cpp +++ b/Material/Material1D/vonMises/NonlinearGurson1D.cpp @@ -157,21 +157,9 @@ int NonlinearGurson1D::reset_status() { } vector NonlinearGurson1D::record(const OutputType P) { - if(P == OutputType::PEEQ) { - vector data; - data.emplace_back(vec{current_history(0)}); - return data; - } - if(P == OutputType::VF) { - vector data; - data.emplace_back(vec{current_history(1)}); - return data; - } - if(P == OutputType::PE) { - vector data; - data.emplace_back(current_strain - current_stress / elastic_modulus); - return data; - } + if(P == OutputType::PEEQ) return {vec{current_history(0)}}; + if(P == OutputType::VF) return {vec{current_history(1)}}; + if(P == OutputType::PE) return {vec{current_strain - current_stress / elastic_modulus}}; return Material1D::record(P); } diff --git a/Material/Material1D/vonMises/VAFCRP1D.cpp b/Material/Material1D/vonMises/VAFCRP1D.cpp index 451970832..baa44c52e 100644 --- a/Material/Material1D/vonMises/VAFCRP1D.cpp +++ b/Material/Material1D/vonMises/VAFCRP1D.cpp @@ -17,7 +17,7 @@ #include "VAFCRP1D.h" #include -#include +#include #include constexpr double VAFCRP1D::unit_time = 1.; @@ -27,7 +27,7 @@ VAFCRP1D::VAFCRP1D(const unsigned T, const double E, const double Y, const doubl , Material1D(T, R) { access::rw(tolerance) = 1E-15; } int VAFCRP1D::initialize(const shared_ptr& D) { - incre_time = D == nullptr ? &unit_time : &D->get_factory()->get_incre_time(); + incre_time = D == nullptr ? &unit_time : &get_incre_time(D->get_factory()); trial_stiffness = current_stiffness = initial_stiffness = elastic_modulus; @@ -133,12 +133,9 @@ int VAFCRP1D::reset_status() { } vector VAFCRP1D::record(const OutputType P) { - vector data; + if(P == OutputType::PEEQ) return {vec{current_history(size)}}; - if(P == OutputType::PEEQ) data.emplace_back(vec{current_history(size)}); - else return Material1D::record(P); - - return data; + return Material1D::record(P); } void VAFCRP1D::print() { diff --git a/Material/Material2D/Bilinear2D.cpp b/Material/Material2D/Bilinear2D.cpp index 0ed053ae5..302c43739 100644 --- a/Material/Material2D/Bilinear2D.cpp +++ b/Material/Material2D/Bilinear2D.cpp @@ -122,12 +122,9 @@ void Bilinear2D::print() { } vector Bilinear2D::record(const OutputType P) { - vector output; - output.reserve(1); - - if(P == OutputType::PE) output.emplace_back(current_strain - solve(initial_stiffness, current_stress)); - else if(P == OutputType::PEP) output.emplace_back(transform::strain::principal(current_strain - solve(initial_stiffness, current_stress))); - else if(P == OutputType::MISES) { + if(P == OutputType::PE) return {vec{current_strain - solve(initial_stiffness, current_stress)}}; + if(P == OutputType::PEP) return {transform::strain::principal(current_strain - solve(initial_stiffness, current_stress))}; + if(P == OutputType::MISES) { vec trial_mises(1); if(plane_type == PlaneType::S) trial_mises(0) = sqrt(current_stress(0) * current_stress(0) - current_stress(0) * current_stress(1) + current_stress(1) * current_stress(1) + 3. * current_stress(2) * current_stress(2)); else if(plane_type == PlaneType::E) { @@ -138,11 +135,11 @@ vector Bilinear2D::record(const OutputType P) { const auto tmp_c = sigma_33 - sigma_mean; trial_mises(0) = sqrt(1.5 * (tmp_a * tmp_a + tmp_b * tmp_b + tmp_c * tmp_c + 2. * current_stress(2) * current_stress(2))); } - output.emplace_back(trial_mises); + + return {trial_mises}; } - else if(P == OutputType::EEEQ) output.emplace_back(vec{sqrt(2. / 3.) * tensor::strain::norm(current_full_strain)}); - else if(P == OutputType::PEEQ) return base.record(P); - else return Material2D::record(P); + if(P == OutputType::EEEQ) return {vec{sqrt(2. / 3.) * tensor::strain::norm(current_full_strain)}}; + if(P == OutputType::PEEQ) return base.record(P); - return output; + return Material2D::record(P); } diff --git a/Material/Material2D/Elastic/Elastic2D.cpp b/Material/Material2D/Elastic/Elastic2D.cpp index 8ec727169..d9cc36ea8 100644 --- a/Material/Material2D/Elastic/Elastic2D.cpp +++ b/Material/Material2D/Elastic/Elastic2D.cpp @@ -95,10 +95,9 @@ void Elastic2D::print() { } vector Elastic2D::record(const OutputType P) { - vector output; - output.reserve(1); + const auto sigma_33 = elastic_modulus * poissons_ratio / (1. + poissons_ratio) / (1. - 2. * poissons_ratio) * (trial_strain(0) + trial_strain(1)); - if(const auto sigma_33 = elastic_modulus * poissons_ratio / (1. + poissons_ratio) / (1. - 2. * poissons_ratio) * (trial_strain(0) + trial_strain(1)); P == OutputType::MISES) { + if(P == OutputType::MISES) { vec trial_mises(1); if(plane_type == PlaneType::S) trial_mises(0) = sqrt(current_stress(0) * current_stress(0) - current_stress(0) * current_stress(1) + current_stress(1) * current_stress(1) + 3. * current_stress(2) * current_stress(2)); else if(plane_type == PlaneType::E) { @@ -108,9 +107,10 @@ vector Elastic2D::record(const OutputType P) { const auto tmp_c = sigma_33 - sigma_mean; trial_mises(0) = sqrt(1.5 * (tmp_a * tmp_a + tmp_b * tmp_b + tmp_c * tmp_c + 2. * current_stress(2) * current_stress(2))); } - output.emplace_back(trial_mises); + + return {trial_mises}; } - else if(P == OutputType::S) { + if(P == OutputType::S) { vec trail_sigma(4); trail_sigma(0) = trial_stress(0); @@ -118,10 +118,9 @@ vector Elastic2D::record(const OutputType P) { trail_sigma(3) = trial_stress(2); trail_sigma(2) = plane_type == PlaneType::S ? 0. : sigma_33; - output.emplace_back(trail_sigma); + return {trail_sigma}; } - else if(P == OutputType::SP) output.emplace_back(transform::stress::principal(trial_stress)); - else return Material2D::record(P); + if(P == OutputType::SP) return {transform::stress::principal(trial_stress)}; - return output; + return Material2D::record(P); } diff --git a/Material/Material3D/Concrete/CDPM2.cpp b/Material/Material3D/Concrete/CDPM2.cpp index 5ca819531..1c3616c9d 100644 --- a/Material/Material3D/Concrete/CDPM2.cpp +++ b/Material/Material3D/Concrete/CDPM2.cpp @@ -557,14 +557,11 @@ int CDPM2::reset_status() { } vector CDPM2::record(const OutputType T) { - vector data; + if(T == OutputType::KAPPAP) return {vec{current_history(0)}}; + if(T == OutputType::DT) return {vec{current_history(16)}}; + if(T == OutputType::DC) return {vec{current_history(17)}}; - if(T == OutputType::KAPPAP) data.emplace_back(vec{current_history(0)}); - else if(T == OutputType::DT) data.emplace_back(vec{current_history(16)}); - else if(T == OutputType::DC) data.emplace_back(vec{current_history(17)}); - else return Material3D::record(T); - - return data; + return Material3D::record(T); } void CDPM2::print() { suanpan_info("A concrete damage plasticity model based on the CDPM2 model. doi: 10.1016/j.ijsolstr.2013.07.008\n"); } diff --git a/Material/Material3D/Concrete/NonlinearCDP.cpp b/Material/Material3D/Concrete/NonlinearCDP.cpp index 42a4a3ba6..161408ded 100644 --- a/Material/Material3D/Concrete/NonlinearCDP.cpp +++ b/Material/Material3D/Concrete/NonlinearCDP.cpp @@ -248,15 +248,12 @@ int NonlinearCDP::reset_status() { } vector NonlinearCDP::record(const OutputType T) { - vector data; + if(T == OutputType::DT) return {vec{current_history(0)}}; + if(T == OutputType::DC) return {vec{current_history(1)}}; + if(T == OutputType::KAPPAT) return {vec{current_history(2)}}; + if(T == OutputType::KAPPAC) return {vec{current_history(3)}}; - if(T == OutputType::DT) data.emplace_back(vec{current_history(0)}); - else if(T == OutputType::DC) data.emplace_back(vec{current_history(1)}); - else if(T == OutputType::KAPPAT) data.emplace_back(vec{current_history(2)}); - else if(T == OutputType::KAPPAC) data.emplace_back(vec{current_history(3)}); - else return Material3D::record(T); - - return data; + return Material3D::record(T); } void NonlinearCDP::print() { suanpan_info("A concrete damage plasticity model.\n"); } diff --git a/Material/Material3D/vonMises/ArmstrongFrederick.cpp b/Material/Material3D/vonMises/ArmstrongFrederick.cpp index 0c674d950..18eb4203a 100644 --- a/Material/Material3D/vonMises/ArmstrongFrederick.cpp +++ b/Material/Material3D/vonMises/ArmstrongFrederick.cpp @@ -147,12 +147,9 @@ int ArmstrongFrederick::reset_status() { } vector ArmstrongFrederick::record(const OutputType P) { - vector data; + if(P == OutputType::PEEQ) return {vec{current_history(0)}}; - if(P == OutputType::PEEQ) data.emplace_back(vec{current_history(0)}); - else return Material3D::record(P); - - return data; + return Material3D::record(P); } void ArmstrongFrederick::print() { suanpan_info("A 3D nonlinear hardening model using Armstrong--Frederick kinematic hardening rule.\n"); } diff --git a/Material/Material3D/vonMises/BilinearJ2.cpp b/Material/Material3D/vonMises/BilinearJ2.cpp index 1b1ab1ab8..f918a4d98 100644 --- a/Material/Material3D/vonMises/BilinearJ2.cpp +++ b/Material/Material3D/vonMises/BilinearJ2.cpp @@ -105,14 +105,11 @@ int BilinearJ2::reset_status() { } vector BilinearJ2::record(const OutputType P) { - vector data; + if(P == OutputType::MISES) return {vec{tensor::stress::norm(tensor::dev(current_stress)) / root_two_third}}; + if(P == OutputType::EEQ) return {vec{root_two_third * tensor::strain::norm(tensor::dev(current_strain))}}; + if(P == OutputType::PEEQ) return {vec{current_history(0)}}; - if(P == OutputType::MISES) data.emplace_back(vec{tensor::stress::norm(tensor::dev(current_stress)) / root_two_third}); - else if(P == OutputType::EEQ) data.emplace_back(vec{root_two_third * tensor::strain::norm(tensor::dev(current_strain))}); - else if(P == OutputType::PEEQ) data.emplace_back(vec{current_history(0)}); - else return Material3D::record(P); - - return data; + return Material3D::record(P); } void BilinearJ2::print() { suanpan_info("A 3D bilinear hardening model.\n"); } diff --git a/Material/Material3D/vonMises/NonlinearGurson.cpp b/Material/Material3D/vonMises/NonlinearGurson.cpp index 9172157fc..1a408150a 100644 --- a/Material/Material3D/vonMises/NonlinearGurson.cpp +++ b/Material/Material3D/vonMises/NonlinearGurson.cpp @@ -166,21 +166,9 @@ int NonlinearGurson::reset_status() { } vector NonlinearGurson::record(const OutputType P) { - if(P == OutputType::PEEQ) { - vector data; - data.emplace_back(vec{current_history(0)}); - return data; - } - if(P == OutputType::VF) { - vector data; - data.emplace_back(vec{current_history(1)}); - return data; - } - if(P == OutputType::PE) { - vector data; - data.emplace_back(current_strain - solve(initial_stiffness, current_stress)); - return data; - } + if(P == OutputType::PEEQ) return {vec{current_history(0)}}; + if(P == OutputType::VF) return {vec{current_history(1)}}; + if(P == OutputType::PE) return {vec{current_strain - solve(initial_stiffness, current_stress)}}; return Material3D::record(P); } diff --git a/Material/Material3D/vonMises/NonlinearJ2.cpp b/Material/Material3D/vonMises/NonlinearJ2.cpp index 5aeb6daa9..9441e391f 100644 --- a/Material/Material3D/vonMises/NonlinearJ2.cpp +++ b/Material/Material3D/vonMises/NonlinearJ2.cpp @@ -126,11 +126,7 @@ int NonlinearJ2::reset_status() { } vector NonlinearJ2::record(const OutputType P) { - if(P == OutputType::PEEQ) { - vector data; - data.emplace_back(vec{current_history(0)}); - return data; - } + if(P == OutputType::PEEQ) return {vec{current_history(0)}}; return Material3D::record(P); } diff --git a/Material/Material3D/vonMises/NonlinearPeric.cpp b/Material/Material3D/vonMises/NonlinearPeric.cpp index 68d842dbd..191fbf242 100644 --- a/Material/Material3D/vonMises/NonlinearPeric.cpp +++ b/Material/Material3D/vonMises/NonlinearPeric.cpp @@ -17,7 +17,7 @@ #include "NonlinearPeric.h" #include -#include +#include #include const double NonlinearPeric::root_three_two = sqrt(1.5); @@ -29,7 +29,7 @@ NonlinearPeric::NonlinearPeric(const unsigned T, const double E, const double V, , Material3D(T, R) {} int NonlinearPeric::initialize(const shared_ptr& D) { - incre_time = D == nullptr ? &unit_time : &D->get_factory()->get_incre_time(); + incre_time = D == nullptr ? &unit_time : &get_incre_time(D->get_factory()); trial_stiffness = current_stiffness = initial_stiffness = tensor::isotropic_stiffness(elastic_modulus, poissons_ratio); diff --git a/Material/Material3D/vonMises/VAFCRP.cpp b/Material/Material3D/vonMises/VAFCRP.cpp index 4720b72fc..fa7e279bf 100644 --- a/Material/Material3D/vonMises/VAFCRP.cpp +++ b/Material/Material3D/vonMises/VAFCRP.cpp @@ -17,7 +17,7 @@ #include "VAFCRP.h" #include -#include +#include #include #include @@ -29,7 +29,7 @@ VAFCRP::VAFCRP(const unsigned T, const double E, const double V, const double Y, , Material3D(T, R) { access::rw(tolerance) = 1E-15; } int VAFCRP::initialize(const shared_ptr& D) { - incre_time = &D->get_factory()->get_incre_time(); + incre_time = &get_incre_time(D->get_factory()); trial_stiffness = current_stiffness = initial_stiffness = tensor::isotropic_stiffness(elastic_modulus, poissons_ratio); @@ -148,12 +148,9 @@ int VAFCRP::reset_status() { } vector VAFCRP::record(const OutputType P) { - vector data; + if(P == OutputType::PEEQ) return {vec{current_history(0)}}; - if(P == OutputType::PEEQ) data.emplace_back(vec{current_history(0)}); - else return Material3D::record(P); - - return data; + return Material3D::record(P); } void VAFCRP::print() { suanpan_info("The VADCRP model.\n"); } diff --git a/Material/MaterialParser.cpp b/Material/MaterialParser.cpp index f724ba349..29da06bb1 100644 --- a/Material/MaterialParser.cpp +++ b/Material/MaterialParser.cpp @@ -1133,6 +1133,52 @@ void new_dhakal(unique_ptr& return_obj, istringstream& command) { return_obj = make_unique(tag, mat_tag, y_strain, parameter); } +void new_sinh1d(unique_ptr& return_obj, istringstream& command) { + unsigned tag; + if(!get_input(command, tag)) { + suanpan_error("new_sinh1d() requires a valid tag.\n"); + return; + } + + double elastic_modulus; + if(!get_input(command, elastic_modulus)) { + suanpan_error("new_sinh1d() requires a valid elastic modulus.\n"); + return; + } + + auto density = 0.; + if(command.eof()) suanpan_debug("new_sinh1d() assumes zero density.\n"); + else if(!get_input(command, density)) { + suanpan_error("new_sinh1d() requires a valid density.\n"); + return; + } + + return_obj = make_unique(tag, elastic_modulus, density); +} + +void new_tanh1d(unique_ptr& return_obj, istringstream& command) { + unsigned tag; + if(!get_input(command, tag)) { + suanpan_error("new_tanh1d() requires a valid tag.\n"); + return; + } + + double elastic_modulus; + if(!get_input(command, elastic_modulus)) { + suanpan_error("new_tanh1d() requires a valid elastic modulus.\n"); + return; + } + + auto density = 0.; + if(command.eof()) suanpan_debug("new_tanh1d() assumes zero density.\n"); + else if(!get_input(command, density)) { + suanpan_error("new_tanh1d() requires a valid density.\n"); + return; + } + + return_obj = make_unique(tag, elastic_modulus, density); +} + void new_elastic1d(unique_ptr& return_obj, istringstream& command) { unsigned tag; if(!get_input(command, tag)) { @@ -3312,6 +3358,8 @@ int create_new_material(const shared_ptr& domain, istringstream& com else if(is_equal(material_id, "CoulombFriction")) new_coulombfriction(new_material, command); else if(is_equal(material_id, "Dhakal")) new_dhakal(new_material, command); else if(is_equal(material_id, "DafaliasManzari")) new_dafaliasmanzari(new_material, command); + else if(is_equal(material_id, "Sinh1D")) new_sinh1d(new_material, command); + else if(is_equal(material_id, "Tanh1D")) new_tanh1d(new_material, command); else if(is_equal(material_id, "Elastic1D")) new_elastic1d(new_material, command); else if(is_equal(material_id, "Elastic2D")) new_elastic2d(new_material, command); else if(is_equal(material_id, "Elastic3D")) new_isotropicelastic3d(new_material, command); diff --git a/Option.cmake b/Option.cmake index 18686deb9..4948d41f3 100644 --- a/Option.cmake +++ b/Option.cmake @@ -138,12 +138,19 @@ endif () if (USE_HDF5) add_compile_definitions(SUANPAN_HDF5) - include_directories(Include/hdf5) - include_directories(Include/hdf5-${SP_EXTERNAL_LIB_PATH}) - if (COMPILER_IDENTIFIER MATCHES "vs") - link_libraries(libhdf5_hl libhdf5) + if (HAVE_VTK) + string(REGEX REPLACE "/lib/cmake/vtk" "/include/vtk" VTK_INCLUDE ${VTK_DIR}) + include_directories(${VTK_INCLUDE}/vtkhdf5) + include_directories(${VTK_INCLUDE}/vtkhdf5/src) + include_directories(${VTK_INCLUDE}/vtkhdf5/hl/src) else () - link_libraries(hdf5_hl hdf5) + include_directories(Include/hdf5) + include_directories(Include/hdf5-${SP_EXTERNAL_LIB_PATH}) + if (COMPILER_IDENTIFIER MATCHES "vs") + link_libraries(libhdf5_hl libhdf5) + else () + link_libraries(hdf5_hl hdf5) + endif () endif () else () add_compile_definitions(ARMA_DONT_USE_HDF5) diff --git a/README.md b/README.md index d3bfc1687..18a96f5f3 100644 --- a/README.md +++ b/README.md @@ -109,13 +109,33 @@ It is also possible to use [Scoop](https://scoop.sh/) to install the package. ### Linux -Linux's users are recommended to obtain the binaries via snap. The snap supports visualization via VTK and uses Intel -MKL for linear algebra. +Linux's users are recommended to obtain the binaries via snap or flatpak. + +#### Snap + +The snap supports visualization via VTK and uses Intel MKL for linear algebra. [![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/suanpan) [![asciicast](https://asciinema.org/a/491330.svg)](https://asciinema.org/a/491330) +#### Flatpak + +Flatpak is also available if preferred. + +Download on Flathub + +```bash +# add repo +flatpak remote-add --if-not-exists flathub https://flathub.org/repo/flathub.flatpakrepo +# install +flatpak install flathub io.github.tlcfem.suanPan +# define alias +echo "alias suanpan=\"flatpak run io.github.tlcfem.suanPan\"" >> ~/.bashrc +``` + +#### Installation Packages + Alternatively, download the RPM (Fedora 35) or DEB (Ubuntu 22.04) package from the release page. The packages may not be compatible with older distributions (due to different version of `libstdc++`). It is also possible to compile the package via docker, check the dockerfiles under the `Script` folder, for any questions please open an issue. @@ -173,7 +193,7 @@ to `~/.local/share/applications`. Additional libraries used in **suanPan** are listed as follows. -- [**ARPACK**](https://www.caam.rice.edu/software/ARPACK/) version 0.96 +- [**ARPACK**](https://github.com/opencollab/arpack-ng) - [**SPIKE**](http://www.spike-solver.org/) version 1.0 - [**FEAST**](http://www.feast-solver.org/) version 4.0 - [**SuperLU**](https://portal.nersc.gov/project/sparse/superlu/) version 5.3.0 and [**SuperLU MT**](https://portal.nersc.gov/project/sparse/superlu/) version 3.1 @@ -184,10 +204,10 @@ Additional libraries used in **suanPan** are listed as follows. - [**METIS**](http://glaros.dtc.umn.edu/gkhome/metis/metis/overview) version 5.1.0 - [**VTK**](https://vtk.org/) version 9.2.2 - [**CUDA**](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/) version 11.7 -- [**Armadillo**](http://arma.sourceforge.net/) version 11.0 +- [**Armadillo**](http://arma.sourceforge.net/) version 11.4 - [**ensmallen**](https://ensmallen.org/) version 2.19.0 -- [**oneMKL**](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html) version 2022.2.0 -- [**Catch2**](https://github.com/catchorg/Catch2) version 2.13.9 +- [**oneMKL**](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onemkl.html) version 2023.0.0 +- [**Catch2**](https://github.com/catchorg/Catch2) version 2.13.10 - **thread_pool** abridged version of [`thread-pool`](https://github.com/bshoshany/thread-pool) Those libraries may depend on other libraries such as [zlib](https://zlib.net/) diff --git a/Recorder/AmplitudeRecorder.cpp b/Recorder/AmplitudeRecorder.cpp index 174b779ee..0ddf7a18a 100644 --- a/Recorder/AmplitudeRecorder.cpp +++ b/Recorder/AmplitudeRecorder.cpp @@ -29,11 +29,9 @@ void AmplitudeRecorder::initialize(const shared_ptr& D) { } void AmplitudeRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; + if(!if_perform_record()) return; - counter = 1; - - auto& current_time = D->get_factory()->get_current_time(); + const sp_d auto current_time = D->get_factory()->get_current_time(); auto& obj_tag = get_object_tag(); for(unsigned I = 0; I < obj_tag.n_elem; ++I) insert({{D->get(obj_tag(I))->get_amplitude(current_time)}}, I); diff --git a/Recorder/EigenRecorder.cpp b/Recorder/EigenRecorder.cpp index 4de8eaf48..d76c1bbe3 100644 --- a/Recorder/EigenRecorder.cpp +++ b/Recorder/EigenRecorder.cpp @@ -21,8 +21,8 @@ #include #ifdef SUANPAN_HDF5 -#include -#include +#include +#include #endif extern fs::path SUANPAN_OUTPUT; diff --git a/Recorder/ElementRecorder.cpp b/Recorder/ElementRecorder.cpp index 796c980ad..f9271509e 100644 --- a/Recorder/ElementRecorder.cpp +++ b/Recorder/ElementRecorder.cpp @@ -29,9 +29,7 @@ void ElementRecorder::initialize(const shared_ptr& D) { } void ElementRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; - - counter = 1; + if(!if_perform_record()) return; auto& obj_tag = get_object_tag(); diff --git a/Recorder/FrameRecorder.cpp b/Recorder/FrameRecorder.cpp index ff8d03ccd..52c9ff134 100644 --- a/Recorder/FrameRecorder.cpp +++ b/Recorder/FrameRecorder.cpp @@ -21,8 +21,8 @@ #include #ifdef SUANPAN_HDF5 -#include -#include +#include +#include #endif extern fs::path SUANPAN_OUTPUT; @@ -44,30 +44,28 @@ FrameRecorder::~FrameRecorder() { void FrameRecorder::record([[maybe_unused]] const shared_ptr& D) { #ifdef SUANPAN_HDF5 - if(1 == interval || counter++ == interval) { - counter = 1; + if(!if_perform_record()) return; - ostringstream group_name; - group_name << "/"; - group_name << D->get_factory()->get_current_time(); + ostringstream group_name; + group_name << "/"; + group_name << D->get_factory()->get_current_time(); - const auto group_id = H5Gcreate(file_id, group_name.str().c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + const auto group_id = H5Gcreate(file_id, group_name.str().c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - for(const auto& I : D->get_element_pool()) { - if(const auto data = I->record(get_variable_type()); !data.empty()) { - mat data_to_write(data[0].n_elem, data.size()); + for(const auto& I : D->get_element_pool()) { + if(const auto data = I->record(get_variable_type()); !data.empty()) { + mat data_to_write(data[0].n_elem, data.size()); - uword idx = 0; - for(const auto& J : data) data_to_write.col(idx++) = J; + uword idx = 0; + for(const auto& J : data) data_to_write.col(idx++) = J; - const hsize_t dimension[2] = {data_to_write.n_cols, data_to_write.n_rows}; + const hsize_t dimension[2] = {data_to_write.n_cols, data_to_write.n_rows}; - H5LTmake_dataset(group_id, std::to_string(I->get_tag()).c_str(), 2, dimension, H5T_NATIVE_DOUBLE, data_to_write.mem); - } + H5LTmake_dataset(group_id, std::to_string(I->get_tag()).c_str(), 2, dimension, H5T_NATIVE_DOUBLE, data_to_write.mem); } - - H5Gclose(group_id); } + + H5Gclose(group_id); #endif } diff --git a/Recorder/FrameRecorder.h b/Recorder/FrameRecorder.h index c51095da7..dab33283b 100644 --- a/Recorder/FrameRecorder.h +++ b/Recorder/FrameRecorder.h @@ -35,6 +35,7 @@ class FrameRecorder final : public Recorder { #ifdef SUANPAN_HDF5 hid_t file_id = 0; #endif + public: FrameRecorder(unsigned, // tag OutputType, // recorder type diff --git a/Recorder/GlobalMassRecorder.cpp b/Recorder/GlobalMassRecorder.cpp index e9163a47d..18b690d51 100644 --- a/Recorder/GlobalMassRecorder.cpp +++ b/Recorder/GlobalMassRecorder.cpp @@ -30,9 +30,7 @@ GlobalMassRecorder::GlobalMassRecorder(const unsigned T, const unsigned I, const : GlobalRecorder(T, OutputType::M, I, R, H) {} void GlobalMassRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; - - counter = 1; + if(!if_perform_record()) return; auto& W = D->get_factory(); auto& C = D->get_color_map(); diff --git a/Recorder/GlobalRecorder.cpp b/Recorder/GlobalRecorder.cpp index d42c7fa90..077252891 100644 --- a/Recorder/GlobalRecorder.cpp +++ b/Recorder/GlobalRecorder.cpp @@ -25,9 +25,7 @@ GlobalRecorder::GlobalRecorder(const unsigned T, const OutputType L, const unsig : Recorder(T, {0}, L, I, R, H) {} void GlobalRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; - - counter = 1; + if(!if_perform_record()) return; auto get_momentum_component = [&](const DOF C) { auto momentum = 0.; diff --git a/Recorder/GlobalStiffnessRecorder.cpp b/Recorder/GlobalStiffnessRecorder.cpp index 73ed1f12c..a5067d7c8 100644 --- a/Recorder/GlobalStiffnessRecorder.cpp +++ b/Recorder/GlobalStiffnessRecorder.cpp @@ -30,9 +30,7 @@ GlobalStiffnessRecorder::GlobalStiffnessRecorder(const unsigned T, const unsigne : GlobalRecorder(T, OutputType::K, I, R, H) {} void GlobalStiffnessRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; - - counter = 1; + if(!if_perform_record()) return; auto& W = D->get_factory(); auto& C = D->get_color_map(); diff --git a/Recorder/NodeRecorder.cpp b/Recorder/NodeRecorder.cpp index 8e0c44312..b395b63cb 100644 --- a/Recorder/NodeRecorder.cpp +++ b/Recorder/NodeRecorder.cpp @@ -30,9 +30,7 @@ void NodeRecorder::initialize(const shared_ptr& D) { } void NodeRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; - - counter = 1; + if(!if_perform_record()) return; auto& obj_tag = get_object_tag(); diff --git a/Recorder/Recorder.cpp b/Recorder/Recorder.cpp index 1a1fb1811..447235fd7 100644 --- a/Recorder/Recorder.cpp +++ b/Recorder/Recorder.cpp @@ -20,8 +20,8 @@ extern fs::path SUANPAN_OUTPUT; #ifdef SUANPAN_HDF5 -#include -#include +#include +#include #endif /** @@ -58,6 +58,8 @@ bool Recorder::if_hdf5() const { return use_hdf5; } bool Recorder::if_record_time() const { return record_time; } +bool Recorder::if_perform_record() { return 1 == interval || 0 == std::remainder(counter++, interval); } + void Recorder::insert(const double T) { time_pool.emplace_back(T); } void Recorder::insert(const std::vector& D, const unsigned I) { data_pool[I].emplace_back(D); } @@ -90,12 +92,12 @@ void Recorder::save() { auto max_size = 0llu; for(const auto& I : s_data_pool[0]) if(I.n_elem > max_size) max_size = I.n_elem; - mat data_to_write(s_data_pool.cbegin()->size() * max_size + 1, time_pool.size() + 1, fill::zeros); + mat data_to_write(s_data_pool.cbegin()->size() * max_size + 1, time_pool.size(), fill::zeros); for(size_t I = 0; I < time_pool.size(); ++I) { - data_to_write(0, I + 1) = time_pool[I]; + data_to_write(0, I) = time_pool[I]; unsigned L = 1; - for(const auto& J : s_data_pool[I]) for(unsigned K = 0; K < J.n_elem; ++K) data_to_write(L++, I + 1) = J[K]; + for(const auto& J : s_data_pool[I]) for(unsigned K = 0; K < J.n_elem; ++K) data_to_write(L++, I) = J[K]; } hsize_t dimension[2] = {data_to_write.n_cols, data_to_write.n_rows}; @@ -115,12 +117,12 @@ void Recorder::save() { auto max_size = 0llu; for(const auto& I : s_data_pool[0]) if(I.n_elem > max_size) max_size = I.n_elem; - mat data_to_write(s_data_pool.cbegin()->size() * max_size + 1, time_pool.size() + 1, fill::zeros); + mat data_to_write(s_data_pool.cbegin()->size() * max_size + 1, time_pool.size(), fill::zeros); for(size_t I = 0; I < time_pool.size(); ++I) { - data_to_write(0, I + 1) = time_pool[I]; + data_to_write(0, I) = time_pool[I]; unsigned L = 1; - for(const auto& J : s_data_pool[I]) for(unsigned K = 0; K < J.n_elem; ++K) data_to_write(L++, I + 1) = J[K]; + for(const auto& J : s_data_pool[I]) for(unsigned K = 0; K < J.n_elem; ++K) data_to_write(L++, I) = J[K]; } ostringstream dataset_name; @@ -135,12 +137,12 @@ void Recorder::save() { auto max_size = 0llu; for(const auto& I : s_data_pool[0]) if(I.n_elem > max_size) max_size = I.n_elem; - mat data_to_write(s_data_pool.cbegin()->size() * max_size + 1, time_pool.size() + 1, fill::zeros); + mat data_to_write(s_data_pool.cbegin()->size() * max_size + 1, time_pool.size(), fill::zeros); for(size_t I = 0; I < time_pool.size(); ++I) { - data_to_write(0, I + 1) = time_pool[I]; + data_to_write(0, I) = time_pool[I]; auto L = 1; - for(const auto& J : s_data_pool[I]) for(unsigned K = 0; K < J.n_elem; ++K) data_to_write(L++, I + 1) = J[K]; + for(const auto& J : s_data_pool[I]) for(unsigned K = 0; K < J.n_elem; ++K) data_to_write(L++, I) = J[K]; } ostringstream dataset_name; diff --git a/Recorder/Recorder.h b/Recorder/Recorder.h index edced548b..43af5cd00 100644 --- a/Recorder/Recorder.h +++ b/Recorder/Recorder.h @@ -43,7 +43,9 @@ class Recorder : public Tag { protected: const unsigned interval; - unsigned counter = 1; + unsigned counter = 0; + + bool if_perform_record(); public: Recorder(unsigned, // tag diff --git a/Recorder/SumRecorder.cpp b/Recorder/SumRecorder.cpp index f54912af2..02ce47449 100644 --- a/Recorder/SumRecorder.cpp +++ b/Recorder/SumRecorder.cpp @@ -32,9 +32,7 @@ void SumRecorder::initialize(const shared_ptr& D) { } void SumRecorder::record(const shared_ptr& D) { - if(1 != interval && counter++ != interval) return; - - counter = 1; + if(!if_perform_record()) return; auto& obj_tag = get_object_tag(); diff --git a/Recorder/VisualisationRecorder.h b/Recorder/VisualisationRecorder.h index 0549d5857..226c4d70e 100644 --- a/Recorder/VisualisationRecorder.h +++ b/Recorder/VisualisationRecorder.h @@ -44,6 +44,7 @@ class VisualisationRecorder final : public Recorder { void (*function_handler)(const shared_ptr&, vtkInfo) = nullptr; #endif + public: VisualisationRecorder(unsigned, // tag OutputType, // recorder type diff --git a/Resource/suanPan.rc b/Resource/suanPan.rc index cf0d4eb01..d9bc71393 100644 --- a/Resource/suanPan.rc +++ b/Resource/suanPan.rc @@ -51,8 +51,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 2,6,1,0 - PRODUCTVERSION 2,6,1,0 + FILEVERSION 2,7,0,0 + PRODUCTVERSION 2,7,0,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -69,12 +69,12 @@ BEGIN BEGIN VALUE "CompanyName", "TLCFEM" VALUE "FileDescription", "suanPan --- An Open Source, Parallel and Heterogeneous Finite Element Analysis Framework" - VALUE "FileVersion", "2.6.1.0" + VALUE "FileVersion", "2.7.0.0" VALUE "InternalName", "suanPan" VALUE "LegalCopyright", "Copyright (C) 2017-2022 Theodore Chang https://github.com/TLCFEM/suanPan" VALUE "OriginalFilename", "suanPan" VALUE "ProductName", "suanPan" - VALUE "ProductVersion", "2.6.1.0" + VALUE "ProductVersion", "2.7.0.0" END END BLOCK "VarFileInfo" diff --git a/Script/AlmaLinux.Dockerfile b/Script/AlmaLinux.Dockerfile index 441b51433..f2b67fb7d 100644 --- a/Script/AlmaLinux.Dockerfile +++ b/Script/AlmaLinux.Dockerfile @@ -2,8 +2,8 @@ FROM almalinux:9 as build RUN dnf upgrade --refresh -y && dnf install -y libglvnd-devel gcc g++ gfortran rpm-build rpm-devel rpmdevtools cmake wget git -RUN wget -q https://registrationcenter-download.intel.com/akdlm/irc_nas/18898/l_onemkl_p_2022.2.0.8748_offline.sh -RUN sh ./l_onemkl_p_2022.2.0.8748_offline.sh -a --silent --eula accept && rm ./l_onemkl_p_2022.2.0.8748_offline.sh +RUN wget -q https://registrationcenter-download.intel.com/akdlm/irc_nas/19138/l_onemkl_p_2023.0.0.25398_offline.sh +RUN sh ./l_onemkl_p_2023.0.0.25398_offline.sh -a --silent --eula accept && rm ./l_onemkl_p_2023.0.0.25398_offline.sh RUN mkdir vtk-build && cd vtk-build && \ wget -q https://www.vtk.org/files/release/9.2/VTK-9.2.2.tar.gz && tar xf VTK-9.2.2.tar.gz && \ diff --git a/Script/CompileAll.sh b/Script/CompileAll.sh index 882958f37..a4fb670b9 100755 --- a/Script/CompileAll.sh +++ b/Script/CompileAll.sh @@ -9,6 +9,6 @@ for folder in $(find . -maxdepth 1 -type d -name "cmake-build*"); do ( echo "Compiling $folder" cd "$folder" || exit - make -j"$(nproc)" + cmake --build . --target all -j "$(nproc)" ) done diff --git a/Script/Coverage.sh b/Script/Coverage.sh index b0bd033cc..a251fc889 100755 --- a/Script/Coverage.sh +++ b/Script/Coverage.sh @@ -33,7 +33,7 @@ cp ../Example/Material/CYCLE.txt . for file in $files; do echo "Processing $file ..." - ./suanPan -f "$file" >>$log_file + time ./suanPan -f "$file" >>$log_file done { diff --git a/Script/Fedora.Dockerfile b/Script/Fedora.Dockerfile index 195502ca8..dcdb88393 100644 --- a/Script/Fedora.Dockerfile +++ b/Script/Fedora.Dockerfile @@ -1,9 +1,9 @@ -FROM fedora:35 as build +FROM fedora:37 as build RUN dnf upgrade --refresh -y && dnf install -y libglvnd-devel gcc g++ gfortran rpm-build rpm-devel rpmdevtools cmake wget git -RUN wget -q https://registrationcenter-download.intel.com/akdlm/irc_nas/18898/l_onemkl_p_2022.2.0.8748_offline.sh -RUN sh ./l_onemkl_p_2022.2.0.8748_offline.sh -a --silent --eula accept && rm ./l_onemkl_p_2022.2.0.8748_offline.sh +RUN wget -q https://registrationcenter-download.intel.com/akdlm/irc_nas/19138/l_onemkl_p_2023.0.0.25398_offline.sh +RUN sh ./l_onemkl_p_2023.0.0.25398_offline.sh -a --silent --eula accept && rm ./l_onemkl_p_2023.0.0.25398_offline.sh RUN mkdir vtk-build && cd vtk-build && \ wget -q https://www.vtk.org/files/release/9.2/VTK-9.2.2.tar.gz && tar xf VTK-9.2.2.tar.gz && \ @@ -18,11 +18,10 @@ RUN cd suanPan/build && cp suanPan*.rpm / && \ cd suanPan-linux-mkl-vtk/bin && ./suanPan.sh -v && \ cd / && ls -al && rm -r suanPan -FROM fedora:35 as runtime +FROM fedora:37 as runtime COPY --from=build /suanPan*.rpm /suanPan*.rpm RUN dnf upgrade --refresh -y && dnf install ./suanPan*.rpm -y RUN suanPan -v - diff --git a/Script/Ubuntu.Dockerfile b/Script/Ubuntu.Dockerfile index 011db825f..3313d304b 100644 --- a/Script/Ubuntu.Dockerfile +++ b/Script/Ubuntu.Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:focal as build +FROM ubuntu:jammy as build ARG DEBIAN_FRONTEND=noninteractive @@ -14,9 +14,7 @@ RUN apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB RUN echo "deb https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list RUN rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB -RUN apt-get update -y && apt-get install -y gcc-10 g++-10 gfortran-10 cmake git intel-oneapi-mkl-devel libxt-dev freeglut3-dev libxcursor-dev file dpkg-dev - -RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gfortran gfortran /usr/bin/gfortran-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 +RUN apt-get update -y && apt-get install -y gcc g++ gfortran cmake git intel-oneapi-mkl-devel libxt-dev freeglut3-dev libxcursor-dev file dpkg-dev RUN mkdir vtk-build && cd vtk-build && \ wget -q https://www.vtk.org/files/release/9.2/VTK-9.2.2.tar.gz && tar xf VTK-9.2.2.tar.gz && \ @@ -31,7 +29,7 @@ RUN cd suanPan/build && cp suanPan*.deb / && \ cd suanPan-linux-mkl-vtk/bin && ./suanPan.sh -v && \ cd / && ls -al && rm -r suanPan -FROM ubuntu:focal as runtime +FROM ubuntu:jammy as runtime COPY --from=build /suanPan*.deb /suanPan*.deb diff --git a/Section/SectionShell/SectionShell.cpp b/Section/SectionShell/SectionShell.cpp index 42e6ffb9d..18957a9f8 100644 --- a/Section/SectionShell/SectionShell.cpp +++ b/Section/SectionShell/SectionShell.cpp @@ -105,10 +105,7 @@ int SectionShell::update_trial_status(const vec& m_strain, const vec& p_strain, return update_trial_status(m_strain, p_strain); } -vector SectionShell::record(const OutputType&) { - vector data; - return data; -} +vector SectionShell::record(OutputType) { return {}; } unique_ptr suanpan::make_copy(const shared_ptr& S) { return S->get_copy(); } diff --git a/Section/SectionShell/SectionShell.h b/Section/SectionShell/SectionShell.h index a4009c01b..99537edce 100644 --- a/Section/SectionShell/SectionShell.h +++ b/Section/SectionShell/SectionShell.h @@ -115,7 +115,7 @@ class SectionShell : protected SectionShellData, public Tag { virtual unique_ptr get_copy() = 0; - virtual double get_parameter(ParameterType = ParameterType::NONE); + virtual double get_parameter(ParameterType); int update_incre_status(double, double); int update_incre_status(double, double, double, double); @@ -131,7 +131,7 @@ class SectionShell : protected SectionShellData, public Tag { virtual int commit_status() = 0; virtual int reset_status() = 0; - virtual vector record(const OutputType&); + virtual vector record(OutputType); }; namespace suanpan { diff --git a/Solver/Arnoldi.cpp b/Solver/Arnoldi.cpp index 398e952e6..eb5a13674 100644 --- a/Solver/Arnoldi.cpp +++ b/Solver/Arnoldi.cpp @@ -21,9 +21,10 @@ #include #include -Arnoldi::Arnoldi(const unsigned T, const unsigned N) +Arnoldi::Arnoldi(const unsigned T, const unsigned N, const char TP) : Solver(T) - , eigen_num(N) {} + , eigen_num(N) + , eigen_type(TP) {} int Arnoldi::initialize() { if(get_integrator() == nullptr) { @@ -36,7 +37,7 @@ int Arnoldi::initialize() { int Arnoldi::analyze() { auto& G = get_integrator(); - const auto& D = G->get_domain().lock(); + const auto& D = G->get_domain(); auto& W = D->get_factory(); if(SUANPAN_SUCCESS != G->process_modifier()) return SUANPAN_FAIL; @@ -47,7 +48,11 @@ int Arnoldi::analyze() { // if(SUANPAN_SUCCESS != G->process_load()) return SUANPAN_FAIL; if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; - return eig_solve(get_eigenvalue(W), get_eigenvector(W), W->get_stiffness(), W->get_mass(), eigen_num, "SM"); + const shared_ptr t_mass = W->get_mass()->make_copy(); + const auto factor = 1E-12 * t_mass->max(); + for(auto I = 0llu; I < t_mass->n_rows; ++I) t_mass->at(I, I) += factor; + + return eig_solve(get_eigenvalue(W), get_eigenvector(W), W->get_stiffness(), t_mass, eigen_num, 'L' == eigen_type ? "LM" : "SM"); } void Arnoldi::print() { suanpan_info("A solver using Arnoldi method.\n"); } diff --git a/Solver/Arnoldi.h b/Solver/Arnoldi.h index b9b7652b3..c988c8a3a 100644 --- a/Solver/Arnoldi.h +++ b/Solver/Arnoldi.h @@ -32,10 +32,12 @@ class Arnoldi final : public Solver { const unsigned eigen_num; + const char eigen_type; public: explicit Arnoldi(unsigned = 0, // unique solver tag - unsigned = 1 // number of eigenvalues + unsigned = 1, // number of eigenvalues + char = 'S' // type ); int initialize() override; diff --git a/Solver/BFGS.cpp b/Solver/BFGS.cpp index 30e616e27..8dd7d2351 100644 --- a/Solver/BFGS.cpp +++ b/Solver/BFGS.cpp @@ -31,18 +31,16 @@ int BFGS::analyze() { const auto& D = C->get_domain().lock(); auto& W = D->get_factory(); - const auto max_iteration = C->get_max_iteration(); - suanpan_info("current analysis time: %.5f.\n", W->get_trial_time()); + const auto max_iteration = C->get_max_iteration(); + // iteration counter unsigned counter = 0; - // ninja alias - auto& ninja = get_ninja(W); // lambda alias auto& aux_lambda = get_auxiliary_lambda(W); - vec residual; + vec samurai, residual; // clear container hist_ninja.clear(); @@ -55,12 +53,14 @@ int BFGS::analyze() { auto& border = W->get_auxiliary_stiffness(); mat right; if(SUANPAN_SUCCESS != G->solve(right, border)) return SUANPAN_FAIL; - if(!solve(aux_lambda, border.t() * right.head_rows(n_size), border.t() * ninja.head_rows(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; - ninja -= right * aux_lambda; + if(!solve(aux_lambda, border.t() * right.head_rows(n_size), border.t() * samurai.head(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; + samurai -= right * aux_lambda; return SUANPAN_SUCCESS; }; while(true) { + // update for nodes and elements + if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; // process modifiers if(SUANPAN_SUCCESS != G->process_modifier()) return SUANPAN_FAIL; // assemble resistance @@ -72,8 +72,10 @@ int BFGS::analyze() { // process loads and constraints if(SUANPAN_SUCCESS != G->process_load()) return SUANPAN_FAIL; if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; + // indicate the global matrix has been assembled + G->set_matrix_assembled_switch(true); // solve the system and commit current displacement increment - if(SUANPAN_SUCCESS != G->solve(ninja, residual = G->get_force_residual())) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != G->solve(samurai, residual = G->get_force_residual())) return SUANPAN_FAIL; // deal with mpc if(SUANPAN_SUCCESS != adjust_for_mpc()) return SUANPAN_FAIL; } @@ -89,44 +91,48 @@ int BFGS::analyze() { // commit current factor after obtaining residual hist_factor.emplace_back(dot(hist_ninja.back(), hist_residual.back())); // copy current residual to ninja - ninja = residual; + samurai = residual; // perform two-step recursive loop // right side loop for(auto J = static_cast(hist_factor.size()) - 1; J >= 0; --J) { // compute and commit alpha - alpha.emplace_back(dot(hist_ninja[J], ninja) / hist_factor[J]); + alpha.emplace_back(dot(hist_ninja[J], samurai) / hist_factor[J]); // update ninja - ninja -= alpha.back() * hist_residual[J]; + samurai -= alpha.back() * hist_residual[J]; } // apply the Hessian from the factorization in the first iteration - ninja = G->solve(ninja); + samurai = G->solve(samurai); // deal with mpc if(SUANPAN_SUCCESS != adjust_for_mpc()) return SUANPAN_FAIL; // left side loop - for(size_t I = 0, J = hist_factor.size() - 1; I < hist_factor.size(); ++I, --J) ninja += (alpha[J] - dot(hist_residual[I], ninja) / hist_factor[I]) * hist_ninja[I]; + for(size_t I = 0, J = hist_factor.size() - 1; I < hist_factor.size(); ++I, --J) samurai += (alpha[J] - dot(hist_residual[I], samurai) / hist_factor[I]) * hist_ninja[I]; } // commit current displacement increment - hist_ninja.emplace_back(ninja); // complete + hist_ninja.emplace_back(samurai); // complete hist_residual.emplace_back(residual); // part of residual increment // avoid machine error accumulation - G->erase_machine_error(); + G->erase_machine_error(samurai); + + // exit if converged + if(C->is_converged(counter)) return G->sync_status(true); + // exit if maximum iteration is hit + if(++counter > max_iteration) return SUANPAN_FAIL; + // update internal variable - G->update_internal(ninja); + G->update_internal(samurai); // update trial status for factory - G->update_trial_displacement(ninja); + G->update_from_ninja(); // for tracking G->update_load(); // for tracking multiplier G->update_constraint(); - // update for nodes and elements - if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; - // exit if converged - if(C->is_converged()) return SUANPAN_SUCCESS; - // exit if maximum iteration is hit - if(++counter > max_iteration) return SUANPAN_FAIL; + // fast handling for linear elastic case + // sync status using newly computed increment across elements and nodes + // this may just call predictor or call corrector + if(D->get_attribute(ModalAttribute::LinearSystem)) return G->sync_status(false); // check if the maximum record number is hit (L-BFGS) if(counter > max_hist) { diff --git a/Solver/FEAST.cpp b/Solver/FEAST.cpp index 0d79e5fef..a11f7b65a 100644 --- a/Solver/FEAST.cpp +++ b/Solver/FEAST.cpp @@ -37,8 +37,8 @@ int FEAST::linear_solve(const shared_ptr& W) const { std::vector output(4, 0); std::vector input(4, 0.); - input[1] = 0.; // centre - input[2] = radius; // radius + input[1] = centre - radius; // centre + input[2] = centre + radius; // radius output[1] = static_cast(eigen_num); @@ -106,7 +106,7 @@ int FEAST::quadratic_solve(const shared_ptr& W) const { std::vector output(4, 0); std::vector input(4, 0.); - input[0] = radius; // centre + input[0] = centre; // centre input[1] = 0.; // centre input[2] = radius; // radius @@ -187,10 +187,11 @@ int FEAST::quadratic_solve(const shared_ptr& W) const { return SUANPAN_SUCCESS; } -FEAST::FEAST(const unsigned T, const unsigned N, const double R, const bool Q) +FEAST::FEAST(const unsigned T, const unsigned N, const double C, const double R, const bool Q) : Solver(T) , quadratic(Q) , eigen_num(N) + , centre(C) , radius(R) {} int FEAST::initialize() { @@ -201,7 +202,7 @@ int FEAST::initialize() { return SUANPAN_FAIL; } - auto& W = G->get_domain().lock()->get_factory(); + auto& W = G->get_domain()->get_factory(); if(const auto scheme = W->get_storage_scheme(); StorageScheme::SYMMPACK == scheme) { suanpan_error("FEAST solver does not support symmetric pack storage.\n"); @@ -219,7 +220,7 @@ int FEAST::initialize() { int FEAST::analyze() { auto& G = get_integrator(); - const auto& D = G->get_domain().lock(); + const auto& D = G->get_domain(); auto& W = D->get_factory(); if(SUANPAN_SUCCESS != G->process_modifier()) return SUANPAN_FAIL; diff --git a/Solver/FEAST.h b/Solver/FEAST.h index 3f237575b..114fb6195 100644 --- a/Solver/FEAST.h +++ b/Solver/FEAST.h @@ -39,13 +39,13 @@ class FEAST final : public Solver { const bool quadratic = false; const unsigned eigen_num; - const double radius; + const double centre, radius; [[nodiscard]] int linear_solve(const shared_ptr&) const; [[nodiscard]] int quadratic_solve(const shared_ptr&) const; public: - FEAST(unsigned, unsigned, double, bool); + FEAST(unsigned, unsigned, double, double, bool); int initialize() override; diff --git a/Solver/Integrator/BatheExplicit.cpp b/Solver/Integrator/BatheExplicit.cpp new file mode 100644 index 000000000..d1df7f840 --- /dev/null +++ b/Solver/Integrator/BatheExplicit.cpp @@ -0,0 +1,120 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "BatheExplicit.h" +#include +#include + +BatheExplicit::BatheExplicit(const unsigned T, const double R) + : ExplicitIntegrator(T) + , P((2. - std::sqrt(2. + 2. * R)) / (1. - R)) + , Q1((.5 - P) / P / (1. - P)) + , Q2(.5 - P * Q1) + , Q0(.5 - Q1 - Q2) {} + +bool BatheExplicit::has_corrector() const { return true; } + +void BatheExplicit::assemble_resistance() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + auto fa = std::async([&] { D->assemble_resistance(); }); + auto fb = std::async([&] { D->assemble_damping_force(); }); + auto fc = std::async([&] { D->assemble_inertial_force(); }); + + fa.get(); + fb.get(); + fc.get(); + + W->set_sushi(W->get_trial_resistance() + W->get_trial_damping_force() + W->get_trial_inertial_force()); +} + +void BatheExplicit::assemble_matrix() { get_domain()->assemble_trial_mass(); } + +void BatheExplicit::update_incre_time(double T) { + const auto& W = get_domain()->get_factory(); + update_parameter(T *= 2.); + W->update_incre_time(T * (FLAG::FIRST == step_flag ? P : 1. - P)); +} + +int BatheExplicit::update_trial_status() { + const auto& D = get_domain(); + + if(auto& W = D->get_factory(); FLAG::FIRST == step_flag) { + W->update_incre_velocity(A0 * W->get_current_acceleration()); + W->update_incre_displacement(A0 * W->get_current_velocity() + A1 * W->get_current_acceleration()); + } + else { + W->update_incre_velocity(A3 * W->get_current_acceleration()); + W->update_incre_displacement(A3 * W->get_current_velocity() + A4 * W->get_current_acceleration()); + } + + return D->update_trial_status(); +} + +int BatheExplicit::correct_trial_status() { + const auto& D = get_domain(); + + if(auto& W = D->get_factory(); FLAG::FIRST == step_flag) W->update_incre_velocity(A2 * W->get_incre_acceleration()); + else W->update_incre_velocity(A5 * W->get_pre_acceleration() + A6 * W->get_current_acceleration() + A7 * W->get_trial_acceleration()); + + return D->update_trial_status(); +} + +void BatheExplicit::commit_status() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + if(FLAG::FIRST == step_flag) { + step_flag = FLAG::SECOND; + set_time_step_switch(false); + } + else { + step_flag = FLAG::FIRST; + set_time_step_switch(true); + } + + W->commit_pre_displacement(); + W->commit_pre_velocity(); + W->commit_pre_acceleration(); + + ExplicitIntegrator::commit_status(); +} + +void BatheExplicit::clear_status() { + step_flag = FLAG::FIRST; + set_time_step_switch(true); + + ExplicitIntegrator::clear_status(); +} + +void BatheExplicit::update_parameter(const double NT) { + if(suanpan::approx_equal(DT, NT)) return; + + DT = NT; + + A0 = P * DT; + A2 = .5 * A0; + A1 = A0 * A2; + A3 = DT - A0; + A4 = .5 * A3 * A3; + A5 = Q0 * A3; + A6 = (.5 + Q1) * A3; + A7 = Q2 * A3; +} + +void BatheExplicit::print() { suanpan_info("A BatheExplicit solver.\n"); } diff --git a/Solver/Integrator/BatheExplicit.h b/Solver/Integrator/BatheExplicit.h new file mode 100644 index 000000000..d60419336 --- /dev/null +++ b/Solver/Integrator/BatheExplicit.h @@ -0,0 +1,68 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class BatheExplicit + * @brief A BatheExplicit class defines a solver using BatheExplicit algorithm. + * + * @author tlc + * @date 03/12/2022 + * @version 0.1.0 + * @file BatheExplicit.h + * @addtogroup Integrator + * @{ + */ + +#ifndef BATHEEXPLICIT_H +#define BATHEEXPLICIT_H + +#include "Integrator.h" + +class BatheExplicit final : public ExplicitIntegrator { + enum class FLAG { + FIRST, + SECOND + }; + + FLAG step_flag = FLAG::FIRST; + + const double P, Q1, Q2, Q0; + double DT{0.}, A0{0.}, A1{0.}, A2{0.}, A3{0.}, A4{0.}, A5{0.}, A6{0.}, A7{0.}; + +public: + BatheExplicit(unsigned, double); + + [[nodiscard]] bool has_corrector() const override; + + void assemble_resistance() override; + void assemble_matrix() override; + + void update_incre_time(double) override; + + int update_trial_status() override; + int correct_trial_status() override; + + void commit_status() override; + void clear_status() override; + + void update_parameter(double) override; + + void print() override; +}; + +#endif + +//! @} diff --git a/Solver/Integrator/BatheTwoStep.cpp b/Solver/Integrator/BatheTwoStep.cpp index c015cf526..7667415f8 100644 --- a/Solver/Integrator/BatheTwoStep.cpp +++ b/Solver/Integrator/BatheTwoStep.cpp @@ -18,10 +18,16 @@ #include "BatheTwoStep.h" #include #include -#include + +BatheTwoStep::BatheTwoStep(const unsigned T, const double R, const double G) + : ImplicitIntegrator(T) + , GM(G) + , Q1((R + 1) / (2. * GM * (R - 1) + 4)) + , Q2(.5 - GM * Q1) + , Q0(1. - Q1 - Q2) {} void BatheTwoStep::assemble_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_resistance(); }); @@ -36,7 +42,7 @@ void BatheTwoStep::assemble_resistance() { } void BatheTwoStep::assemble_matrix() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_trial_stiffness(); }); @@ -53,26 +59,32 @@ void BatheTwoStep::assemble_matrix() { t_stiff += W->get_geometry(); - t_stiff += FLAG::TRAP == step_flag ? C6 * W->get_mass() + C3 * W->get_damping() : C5 * W->get_mass() + C2 * W->get_damping(); + t_stiff += FLAG::TRAP == step_flag ? P3 * W->get_mass() + P2 * W->get_damping() : P9 * W->get_mass() + P8 * W->get_damping(); +} + +void BatheTwoStep::update_incre_time(double T) { + const auto& W = get_domain()->get_factory(); + update_parameter(T *= 2.); + W->update_incre_time(T * (FLAG::TRAP == step_flag ? GM : 1. - GM)); } int BatheTwoStep::update_trial_status() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); if(auto& W = D->get_factory(); FLAG::TRAP == step_flag) { - W->update_incre_acceleration(C6 * W->get_incre_displacement() - C4 * W->get_current_velocity() - 2. * W->get_current_acceleration()); - W->update_incre_velocity(C3 * W->get_incre_displacement() - 2. * W->get_current_velocity()); + W->update_trial_acceleration(P3 * W->get_incre_displacement() - P4 * W->get_current_velocity() - W->get_current_acceleration()); + W->update_trial_velocity(P2 * W->get_incre_displacement() - W->get_current_velocity()); } else { - W->update_trial_velocity(C2 * W->get_incre_displacement() + C1 * (W->get_pre_displacement() - W->get_current_displacement())); - W->update_trial_acceleration(C1 * W->get_pre_velocity() - C3 * W->get_current_velocity() + C2 * W->get_trial_velocity()); + W->update_trial_velocity(P8 * (W->get_trial_displacement() - W->get_pre_displacement()) - Q02 * W->get_pre_velocity() - Q12 * W->get_current_velocity()); + W->update_trial_acceleration(P8 * (W->get_trial_velocity() - W->get_pre_velocity()) - Q02 * W->get_pre_acceleration() - Q12 * W->get_current_acceleration()); } return D->update_trial_status(); } void BatheTwoStep::commit_status() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); if(FLAG::TRAP == step_flag) { @@ -86,72 +98,63 @@ void BatheTwoStep::commit_status() { W->commit_pre_displacement(); W->commit_pre_velocity(); + W->commit_pre_acceleration(); - Integrator::commit_status(); + ImplicitIntegrator::commit_status(); } void BatheTwoStep::clear_status() { step_flag = FLAG::TRAP; set_time_step_switch(true); - Integrator::clear_status(); + ImplicitIntegrator::clear_status(); } -/** - * \brief update acceleration and velocity for zero displacement increment - */ -void BatheTwoStep::update_compatibility() const { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); +vec BatheTwoStep::from_incre_velocity(const vec& incre_velocity, const uvec& encoding) { + auto& W = get_domain()->get_factory(); - if(FLAG::TRAP == step_flag) { - W->update_incre_acceleration(-C4 * W->get_current_velocity() - 2. * W->get_current_acceleration()); - W->update_incre_velocity(-2. * W->get_current_velocity()); - } - else { - W->update_trial_velocity(C1 * (W->get_pre_displacement() - W->get_current_displacement())); - W->update_trial_acceleration(C1 * W->get_pre_velocity() - C3 * W->get_current_velocity() + C2 * W->get_trial_velocity()); - } + return from_total_velocity(W->get_current_velocity()(encoding) + incre_velocity, encoding); +} - auto& trial_dsp = W->get_trial_displacement(); - auto& trial_vel = W->get_trial_velocity(); - auto& trial_acc = W->get_trial_acceleration(); +vec BatheTwoStep::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { + auto& W = get_domain()->get_factory(); - suanpan::for_all(D->get_node_pool(), [&](const shared_ptr& t_node) { t_node->update_trial_status(trial_dsp, trial_vel, trial_acc); }); + return from_total_acceleration(W->get_current_acceleration()(encoding) + incre_acceleration, encoding); } -vec BatheTwoStep::from_incre_velocity(const vec& incre_velocity, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); +vec BatheTwoStep::from_total_velocity(const vec& total_velocity, const uvec& encoding) { + auto& W = get_domain()->get_factory(); - vec total_displacement = W->get_current_displacement()(encoding); + if(FLAG::TRAP == step_flag) return W->get_current_displacement()(encoding) + P1 * (W->get_current_velocity()(encoding) + total_velocity); - if(FLAG::TRAP == step_flag) total_displacement += incre_velocity / C3 + C0 * W->get_current_velocity()(encoding); - else total_displacement += (incre_velocity + W->get_current_velocity()(encoding)) / C2 + (W->get_current_displacement()(encoding) - W->get_pre_displacement()(encoding)) / 3.; - - return total_displacement; + return W->get_pre_displacement()(encoding) + P5 * W->get_pre_velocity()(encoding) + P6 * W->get_current_velocity()(encoding) + P7 * total_velocity; } -vec BatheTwoStep::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); +vec BatheTwoStep::from_total_acceleration(const vec& total_acceleration, const uvec& encoding) { + auto& W = get_domain()->get_factory(); - vec total_displacement = W->get_current_displacement()(encoding); + vec total_velocity; + if(FLAG::TRAP == step_flag) total_velocity = W->get_current_velocity()(encoding) + P1 * (W->get_current_acceleration()(encoding) + total_acceleration); + else total_velocity = W->get_pre_velocity()(encoding) + P5 * W->get_pre_acceleration()(encoding) + P6 * W->get_current_acceleration()(encoding) + P7 * total_acceleration; - if(FLAG::TRAP == step_flag) total_displacement += incre_acceleration / C6 + C0 * W->get_current_velocity()(encoding) + 2. / C6 * W->get_current_acceleration()(encoding); - else total_displacement += (incre_acceleration + W->get_current_acceleration()(encoding)) / C5 + C3 / C5 * W->get_current_velocity()(encoding) - C1 / C5 * W->get_pre_velocity()(encoding) + (W->get_current_displacement()(encoding) - W->get_pre_displacement()(encoding)) / 3.; - - return total_displacement; + return from_total_velocity(total_velocity, encoding); } void BatheTwoStep::update_parameter(const double NT) { - if(suanpan::approx_equal(C0, NT)) return; - - C0 = NT; - C1 = .5 / C0; - C2 = 3. * C1; - C3 = 4. * C1; - C4 = 2. * C3; - C5 = C2 * C2; - C6 = C4 / C0; + if(suanpan::approx_equal(P0, NT)) return; + + P0 = NT; + + P1 = .5 * P0 * GM; + P2 = 1. / P1; + P3 = P2 * P2; + P4 = 2. * P2; + + P5 = P0 * Q0; + P6 = P0 * Q1; + P7 = P0 * Q2; + P8 = 1. / P7; + P9 = P8 * P8; } void BatheTwoStep::print() { suanpan_info("A BatheTwoStep solver.\n"); } diff --git a/Solver/Integrator/BatheTwoStep.h b/Solver/Integrator/BatheTwoStep.h index ec973d38b..ba38259a4 100644 --- a/Solver/Integrator/BatheTwoStep.h +++ b/Solver/Integrator/BatheTwoStep.h @@ -31,7 +31,7 @@ #include "Integrator.h" -class BatheTwoStep final : public Integrator { +class BatheTwoStep final : public ImplicitIntegrator { enum class FLAG { TRAP, EULER @@ -39,24 +39,31 @@ class BatheTwoStep final : public Integrator { FLAG step_flag = FLAG::TRAP; - double C0 = 0., C1 = 0., C2 = 0., C3 = 0., C4 = 0., C5 = 0., C6 = 0.; + const double GM; + + const double Q1, Q2, Q0, Q02 = Q0 / Q2, Q12 = Q1 / Q2; + + double P0{0.}, P1{0.}, P2{0.}, P3{0.}, P4{0.}, P5{0.}, P6{0.}, P7{0.}, P8{0.}, P9{0.}; public: - using Integrator::Integrator; + BatheTwoStep(unsigned, double, double); void assemble_resistance() override; void assemble_matrix() override; + void update_incre_time(double) override; + int update_trial_status() override; void commit_status() override; void clear_status() override; void update_parameter(double) override; - void update_compatibility() const override; vec from_incre_velocity(const vec&, const uvec&) override; vec from_incre_acceleration(const vec&, const uvec&) override; + vec from_total_velocity(const vec&, const uvec&) override; + vec from_total_acceleration(const vec&, const uvec&) override; void print() override; }; diff --git a/Solver/Integrator/BatheTwoStep.tex b/Solver/Integrator/BatheTwoStep.tex deleted file mode 100644 index 6e5b94a1a..000000000 --- a/Solver/Integrator/BatheTwoStep.tex +++ /dev/null @@ -1,46 +0,0 @@ -\documentclass[a4paper,10pt,fleqn]{article} -\usepackage[margin=20mm]{geometry} -\usepackage{mathpazo,amsmath,amsfonts,amssymb,hyperref} -\hypersetup{colorlinks} -\begin{document} -A leag-frog style algorithm is implemented. Please see the reference \href{https://doi.org/10.1016/j.compstruc.2006.09.004}{doi:10.1016/j.compstruc.2006.09.004} -\section*{The First Step} -The first step is computed by using the Trapezoidal rule. Thus -\begin{gather} -v_{n+1}=v_n+\dfrac{\Delta{}t}{2}\left(a_n+a_{n+1}\right),\\ -u_{n+1}=u_n+\dfrac{\Delta{}t}{2}\left(v_n+v_{n+1}\right). -\end{gather} -Then, -\begin{gather} -u_{n+1}=u_n+\dfrac{\Delta{}t}{2}\left(v_n+v_n+\dfrac{\Delta{}t}{2}\left(a_n+a_{n+1}\right)\right),\qquad -\Delta{}u=\Delta{}tv_n+\dfrac{\Delta{}t^2}{4}a_n+\dfrac{\Delta{}t^2}{4}a_{n+1}. -\end{gather} -One could obtain -\begin{gather} -a_{n+1}=\dfrac{4}{\Delta{}t^2}\Delta{}u-\dfrac{4}{\Delta{}t}v_n-a_n,\qquad -\Delta{}a=\dfrac{4}{\Delta{}t^2}\Delta{}u-\dfrac{4}{\Delta{}t}v_n-2a_n,\\ -v_{n+1}=\dfrac{2}{\Delta{}t}\Delta{}u-v_n,\qquad -\Delta{}v=\dfrac{2}{\Delta{}t}\Delta{}u-2v_n. -\end{gather} - -The effective stiffness is then -\begin{gather} -\bar{K}=K+\dfrac{2}{\Delta{}t}C+\dfrac{4}{\Delta{}t^2}M. -\end{gather} -\section*{The Second Step} -The second step is computed by using the backward Euler method. Thus -\begin{gather} -v_{n+2}=\dfrac{1}{2\Delta{}t}u_n-\dfrac{2}{\Delta{}t}u_{n+1}+\dfrac{3}{2\Delta{}t}u_{n+2},\\ -a_{n+2}=\dfrac{1}{2\Delta{}t}v_n-\dfrac{2}{\Delta{}t}v_{n+1}+\dfrac{3}{2\Delta{}t}v_{n+2}. -\end{gather} -Hence, -\begin{gather} -v_{n+2}=\dfrac{1}{2\Delta{}t}u_n-\dfrac{1}{2\Delta{}t}u_{n+1}+\dfrac{3}{2\Delta{}t}\Delta{}u,\\ -a_{n+2}=\dfrac{1}{2\Delta{}t}v_n-\dfrac{2}{\Delta{}t}v_{n+1}+\dfrac{3}{2\Delta{}t}v_{n+2}. -\end{gather} - -The effective stiffness is then -\begin{gather} -\bar{K}=K+\dfrac{3}{2\Delta{}t}C+\dfrac{9}{4\Delta{}t^2}M. -\end{gather} -\end{document} diff --git a/Solver/Integrator/CMakeLists.txt b/Solver/Integrator/CMakeLists.txt index ccbb45c84..71992eec2 100644 --- a/Solver/Integrator/CMakeLists.txt +++ b/Solver/Integrator/CMakeLists.txt @@ -1,12 +1,16 @@ set(Integrator + Integrator/BatheExplicit.cpp Integrator/BatheTwoStep.cpp Integrator/GeneralizedAlpha.cpp + Integrator/GeneralizedAlphaExplicit.cpp Integrator/GSSSS.cpp Integrator/Integrator.cpp Integrator/LeeNewmark.cpp Integrator/LeeNewmarkBase.cpp Integrator/LeeNewmarkFull.cpp Integrator/Newmark.cpp + Integrator/OALTS.cpp Integrator/RayleighNewmark.cpp + Integrator/Tchamwa.cpp Integrator/WilsonPenzienNewmark.cpp ) diff --git a/Solver/Integrator/GSSSS.cpp b/Solver/Integrator/GSSSS.cpp index bad9562af..754347e04 100644 --- a/Solver/Integrator/GSSSS.cpp +++ b/Solver/Integrator/GSSSS.cpp @@ -18,16 +18,15 @@ #include "GSSSS.h" #include #include -#include GSSSS::GSSSS(const unsigned T) - : Integrator(T) + : ImplicitIntegrator(T) , L1(1.) , L2(.5) , L4(1.) {} void GSSSS::assemble_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_resistance(); }); @@ -38,11 +37,11 @@ void GSSSS::assemble_resistance() { fb.get(); fc.get(); - W->set_sushi(W->get_trial_resistance() + W->get_trial_damping_force() + W->get_trial_inertial_force()); + W->set_sushi(W->get_current_resistance() + W3G3 / L3 * W->get_incre_resistance() + W->get_current_damping_force() + W2G5 / L5 * W->get_incre_damping_force() + W->get_current_inertial_force() + W1G6 * W->get_incre_inertial_force()); } void GSSSS::assemble_matrix() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_trial_stiffness(); }); @@ -55,11 +54,17 @@ void GSSSS::assemble_matrix() { fc.get(); fd.get(); - W->get_stiffness() += W->get_geometry() + XCVD * W->get_damping() + XCAD * W->get_mass(); + W->get_stiffness() += W->get_geometry() + XV * W->get_damping() + XA * W->get_mass(); } +vec GSSSS::get_force_residual() { return XD * ImplicitIntegrator::get_force_residual(); } + +vec GSSSS::get_displacement_residual() { return XD * ImplicitIntegrator::get_displacement_residual(); } + +sp_mat GSSSS::get_reference_load() { return XD * ImplicitIntegrator::get_reference_load(); } + int GSSSS::process_load() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -67,7 +72,7 @@ int GSSSS::process_load() { W->update_trial_time((1. - W1) * current_time + W1 * trial_time); - const auto code = Integrator::process_load(); + const auto code = ImplicitIntegrator::process_load(); W->update_trial_time(trial_time); @@ -75,7 +80,7 @@ int GSSSS::process_load() { } int GSSSS::process_constraint() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -83,7 +88,7 @@ int GSSSS::process_constraint() { W->update_trial_time((1. - W1) * current_time + W1 * trial_time); - const auto code = Integrator::process_constraint(); + const auto code = ImplicitIntegrator::process_constraint(); W->update_trial_time(trial_time); @@ -91,7 +96,7 @@ int GSSSS::process_constraint() { } int GSSSS::process_load_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -99,7 +104,7 @@ int GSSSS::process_load_resistance() { W->update_trial_time((1. - W1) * current_time + W1 * trial_time); - const auto code = Integrator::process_load_resistance(); + const auto code = ImplicitIntegrator::process_load_resistance(); W->update_trial_time(trial_time); @@ -107,7 +112,7 @@ int GSSSS::process_load_resistance() { } int GSSSS::process_constraint_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -115,7 +120,7 @@ int GSSSS::process_constraint_resistance() { W->update_trial_time((1. - W1) * current_time + W1 * trial_time); - const auto code = Integrator::process_constraint_resistance(); + const auto code = ImplicitIntegrator::process_constraint_resistance(); W->update_trial_time(trial_time); @@ -123,68 +128,43 @@ int GSSSS::process_constraint_resistance() { } int GSSSS::update_trial_status() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); - W->update_trial_velocity_by(XCVD * W->get_ninja()); - W->update_trial_acceleration_by(XCAD * W->get_ninja()); + W->update_incre_acceleration(C0 * W->get_incre_displacement() + C1 * W->get_current_velocity() + C2 * W->get_current_acceleration()); + W->update_incre_velocity(C3 * W->get_current_acceleration() + C4 * W->get_incre_acceleration()); return D->update_trial_status(); } -void GSSSS::stage_status() { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); - - W->update_incre_acceleration(W->get_incre_acceleration() / W1G6); - W->update_incre_velocity(L4 * DT * W->get_current_acceleration() + L5 * DT * W->get_incre_acceleration()); - W->update_incre_displacement(L1 * DT * W->get_current_velocity() + L2 * DT * DT * W->get_current_acceleration() + L3 * DT * DT * W->get_incre_acceleration()); - - // since iterative result does not equal to committed result - // need to sync with elements and nodes - [[maybe_unused]] const auto code = D->update_trial_status(); - - Integrator::stage_status(); -} - void GSSSS::update_parameter(const double NT) { if(suanpan::approx_equal(DT, NT)) return; DT = NT; - XPV3 = (W1G4 - W2G5 * L2 / L3) * DT; - XPA2 = -W1G6 * L1 / L3 / DT; - XCVD = W2G5 / W3G3 / DT; - XCAD = W1G6 / W3G3 / DT / DT; -} - -void GSSSS::update_compatibility() const { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); + const auto L3T = 1. / L3 / DT; - auto fb = std::async([&] { W->update_trial_velocity(XPV2 * W->get_current_velocity() + XPV3 * W->get_current_acceleration()); }); - auto fc = std::async([&] { W->update_trial_acceleration(XPA2 * W->get_current_velocity() + XPA3 * W->get_current_acceleration()); }); + C0 = L3T / DT; + C1 = -L1 * L3T; + C2 = -L2 / L3; + C3 = L4 * DT; + C4 = L5 * DT; - fb.get(); - fc.get(); - - auto& trial_dsp = W->get_trial_displacement(); - auto& trial_vel = W->get_trial_velocity(); - auto& trial_acc = W->get_trial_acceleration(); - - suanpan::for_all(D->get_node_pool(), [&](const shared_ptr& t_node) { t_node->update_trial_status(trial_dsp, trial_vel, trial_acc); }); + XD = L3 / W3G3; + XV = W2G5 / W3G3 / DT; + XA = W1G6 / W3G3 / DT / DT; } vec GSSSS::from_incre_velocity(const vec& incre_velocity, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); - return W->get_current_displacement()(encoding) + W3G3 * L1 / L3 * DT * W->get_current_velocity()(encoding) + W3G3 * (L2 / L3 - L4 / L5) * DT * DT * W->get_current_acceleration()(encoding) + W3G3 / L5 * DT * incre_velocity; + return from_incre_acceleration(incre_velocity / C4 - C3 / C4 * W->get_current_acceleration()(encoding), encoding); } vec GSSSS::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); - return W->get_current_displacement()(encoding) + W3G3 * L1 / L3 * DT * W->get_current_velocity()(encoding) + W3G3 * L2 / L3 * DT * DT * W->get_current_acceleration()(encoding) + W3G3 * DT * DT * incre_acceleration; + return incre_acceleration / C0 - C1 / C0 * W->get_current_velocity()(encoding) - C2 / C0 * W->get_current_acceleration()(encoding) + W->get_current_displacement()(encoding); } void GSSSS::print() { suanpan_info("A time integrator using the GSSSS algorithm.\n"); } @@ -193,17 +173,11 @@ template<> void GSSSS::generate_constants(const double R3, const double L3 = 1. / (1. + R1) / (1. + R2); L5 = .5 * (3. + R1 + R2 - R1 * R2) * L3; - W1G1 = 1. / (1. + R3); - W2G2 = .5 * W1G1; - W3G3 = L3 * W1G1; - W1G4 = W1G1; - W2G5 = L5 * W1G1; - W1G6 = (2. + R1 + R2 + R3 - R1 * R2 * R3) * W3G3; - - W1 = W1G1; + W1 = 1. / (1. + R3); - XPV2 = 1. - W2G5 * L1 / L3; - XPA3 = 1. - W1G6 * L2 / L3; + W3G3 = L3 * W1; + W2G5 = L5 * W1; + W1G6 = (2. + R1 + R2 + R3 - R1 * R2 * R3) * W3G3; } GSSSSU0::GSSSSU0(const unsigned T, vec&& R) @@ -216,13 +190,6 @@ template<> void GSSSS::generate_constants(const double R3, const double L3 = .5 / (1. + R3); L5 = 2. * L3; - W2G2 = 1. / (1. + R1) / (1. + R2); - W3G3 = W2G2 / (1. + R3); - W1G1 = .5 * (3. + R1 + R2 - R1 * R2) * W2G2; - W1G4 = W1G1; - W2G5 = 2. * W3G3; - W1G6 = (2. + R1 + R2 + R3 - R1 * R2 * R3) * W3G3; - const auto T0 = 9. - 11. * R1 - 11 * R2 + 19. * R1 * R2; const auto T1 = -30. * (3. - 4. * R1 - 4. * R2 + 6. * R1 * R2); const auto T2 = 7.5 * (25. - 37. * R1 - 37. * R2 + 53. * R1 * R2); @@ -230,8 +197,9 @@ template<> void GSSSS::generate_constants(const double R3, const double W1 = (T0 / 2. + T1 / 3. + T2 / 4. + T3 / 5.) / (T0 + T1 / 2. + T2 / 3. + T3 / 4.); - XPV2 = 1. - W2G5 * L1 / L3; - XPA3 = 1. - W1G6 * L2 / L3; + W3G3 = 1. / (1. + R1) / (1. + R2) / (1. + R3); + W2G5 = 2. * W3G3; + W1G6 = (2. + R1 + R2 + R3 - R1 * R2 * R3) * W3G3; } GSSSSV0::GSSSSV0(const unsigned T, vec&& R) @@ -244,17 +212,11 @@ template<> void GSSSS::generate_constants(const double R, double, L3 = .5 / (1. + R); L5 = 2. * L3; - W1G1 = L5; - W2G2 = L3; - W3G3 = W2G2 * W1G1; - W1G4 = W1G1; - W2G5 = W1G1 * W1G1; - W1G6 = (3. + 2. * R - R * R) * W3G3; - - W1 = W1G1; + W1 = L5; - XPV2 = 1. - W2G5 * L1 / L3; - XPA3 = 1. - W1G6 * L2 / L3; + W3G3 = L3 * L5; + W2G5 = L5 * L5; + W1G6 = (1. + R) * (3. - R) * W3G3; } GSSSSOptimal::GSSSSOptimal(const unsigned T, double R) diff --git a/Solver/Integrator/GSSSS.h b/Solver/Integrator/GSSSS.h index 7a421770d..399a68bf0 100644 --- a/Solver/Integrator/GSSSS.h +++ b/Solver/Integrator/GSSSS.h @@ -35,16 +35,16 @@ #include "Integrator.h" -class GSSSS : public Integrator { +class GSSSS : public ImplicitIntegrator { protected: const double L1, L2, L4; double L3 = 0., L5 = 0.; - double W1 = 0., W1G1 = 0., W2G2 = 0., W3G3 = 0., W1G4 = 0., W2G5 = 0., W1G6 = 0.; + double W1 = 0., W3G3 = 0., W2G5 = 0., W1G6 = 0.; double DT = 0.; - double XPV2 = 0., XPV3 = 0., XPA2 = 0., XPA3 = 0., XCVD = 0., XCAD = 0.; + double C0{0.}, C1{0.}, C2{0.}, C3{0.}, C4{0.}, XD{0.}, XV{0.}, XA{0.}; // ReSharper disable once CppMemberFunctionMayBeStatic template void generate_constants(double, double, double) { throw invalid_argument("need a proper scheme"); } @@ -55,6 +55,10 @@ class GSSSS : public Integrator { void assemble_resistance() override; void assemble_matrix() override; + vec get_force_residual() override; + vec get_displacement_residual() override; + sp_mat get_reference_load() override; + [[nodiscard]] int process_load() override; [[nodiscard]] int process_constraint() override; [[nodiscard]] int process_load_resistance() override; @@ -62,10 +66,7 @@ class GSSSS : public Integrator { int update_trial_status() override; - void stage_status() override; - void update_parameter(double) override; - void update_compatibility() const override; vec from_incre_velocity(const vec&, const uvec&) override; vec from_incre_acceleration(const vec&, const uvec&) override; diff --git a/Solver/Integrator/GSSSS.tex b/Solver/Integrator/GSSSS.tex new file mode 100644 index 000000000..8730c2844 --- /dev/null +++ b/Solver/Integrator/GSSSS.tex @@ -0,0 +1,47 @@ +\documentclass[a4paper,10pt,fleqn]{article} +\usepackage[margin=20mm]{geometry} +\usepackage{mathpazo,amsmath,amsfonts,amssymb} +\newcommand{\ddfrac}[2]{\dfrac{\mathrm{d}~#1}{\mathrm{d}~#2}} +\newcommand{\dt}{\Delta{}t} +\newcommand{\LM}{\varLambda_6W_1} +\newcommand{\LC}{\varLambda_5W_2} +\newcommand{\LK}{\varLambda_3W_3} +\begin{document} +\title{GSSSS}\date{} +\maketitle +\section{Integration} +The GSSSS method assumes that the displacement $d$ and the velocity $v$ are integrated as such, +\begin{gather}\label{EQ1} +d_{n+1}=d_n+\lambda_1\dt{}v_n+\lambda_2\dt^2a_n+\lambda_3\dt^2\left(a_{n+1}-a_n\right),\\ +v_{n+1}=v_n+\lambda_4\dt{}a_n+\lambda_5\dt\left(a_{n+1}-a_n\right), +\end{gather} +where $\lambda_n$ are parameters associated with spectral radii. +\section{Updating} +The incremental form is then +\begin{gather} +\Delta{}a=\dfrac{1}{\lambda_3\dt^2}\Delta{}d-\dfrac{\lambda_1}{\lambda_3\dt}v_n-\dfrac{\lambda_2}{\lambda_3}a_n,\\ +\Delta{}v=\lambda_4\dt{}a_n+\lambda_5\dt\Delta{}a, +\end{gather} +Thus, according to chain rule +\begin{gather} +\ddfrac{a_{n+1}}{d_{n+1}}=\dfrac{1}{\lambda_3\dt^2},\qquad{}\ddfrac{v_{n+1}}{d_{n+1}}=\dfrac{\lambda_5}{\lambda_3\dt}. +\end{gather} +\section{EOM} +The EOM is expressed at somewhere between $t_n$ and $t_{n+1}$. +\begin{multline} +M\left(\left(1-\LM\right)a_n+\LM{}a_{n+1}\right)+ +C\left(\left(1-\dfrac{\LC}{\lambda_5}\right)v_n+\dfrac{\LC}{\lambda_5}v_{n+1}\right)+\\ +K\left(\left(1-\dfrac{\LK}{\lambda_3}\right)d_n+\dfrac{\LK}{\lambda_3}d_{n+1}\right)= +\left(1-W_1\right)F_n+W_1F_{n+1} +\end{multline} +\section{Effective Stiffness} +The effective stiffness can be computed as +\begin{gather} +\bar{K}=\dfrac{\LM}{\lambda_3\dt^2}M+\dfrac{\LC}{\lambda_3\dt}C+\dfrac{\LK}{\lambda_3}K +\end{gather} +with $\Delta{}d=d_{n+1}-d_n$ be the unknown variable. Once converges, +\begin{gather} +\Delta{}a=\dfrac{1}{\lambda_3\dt^2}\Delta{}d-\dfrac{\lambda_1}{\lambda_3\dt}v_n-\dfrac{\lambda_2}{\lambda_3}a_n,\\ +\Delta{}v=\lambda_4\dt{}a_n+\lambda_5\dt\Delta{}a, +\end{gather} +\end{document} \ No newline at end of file diff --git a/Solver/Integrator/GeneralizedAlpha.cpp b/Solver/Integrator/GeneralizedAlpha.cpp index 6aeeae093..1cfc034e4 100644 --- a/Solver/Integrator/GeneralizedAlpha.cpp +++ b/Solver/Integrator/GeneralizedAlpha.cpp @@ -18,10 +18,9 @@ #include "GeneralizedAlpha.h" #include #include -#include GeneralizedAlpha::GeneralizedAlpha(const unsigned T, const double R) - : Integrator(T) + : ImplicitIntegrator(T) , alpha_f(R / (R + 1.)) , alpha_m((2. * R - 1.) / (R + 1.)) , gamma(.5 - (R - 1.) / (R + 1.)) @@ -33,7 +32,7 @@ GeneralizedAlpha::GeneralizedAlpha(const unsigned T, const double R) , F9(-.5 / beta) {} GeneralizedAlpha::GeneralizedAlpha(const unsigned T, const double AF, const double AM) - : Integrator(T) + : ImplicitIntegrator(T) , alpha_f(std::min(.5, std::max(AF, .0))) , alpha_m(std::min(alpha_f, std::max(AM, -1.))) , gamma(.5 - alpha_m + alpha_f) @@ -45,7 +44,7 @@ GeneralizedAlpha::GeneralizedAlpha(const unsigned T, const double AF, const doub , F9(-.5 / beta) { if(!suanpan::approx_equal(alpha_m, AM) || !suanpan::approx_equal(alpha_f, AF)) suanpan_error("GeneralizedAlpha() parameters are not acceptable hence automatically adjusted.\n"); } void GeneralizedAlpha::assemble_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_resistance(); }); @@ -56,11 +55,11 @@ void GeneralizedAlpha::assemble_resistance() { fb.get(); fc.get(); - W->set_sushi(F1 * (W->get_current_resistance() + W->get_current_damping_force()) + F2 * (W->get_trial_resistance() + W->get_trial_damping_force()) + F3 * W->get_current_inertial_force() + F4 * W->get_trial_inertial_force()); + W->set_sushi(W->get_current_resistance() + F2 * W->get_incre_resistance() + W->get_current_damping_force() + F2 * W->get_incre_damping_force() + W->get_current_inertial_force() + F4 * W->get_incre_inertial_force()); } void GeneralizedAlpha::assemble_matrix() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_trial_stiffness(); }); @@ -73,16 +72,17 @@ void GeneralizedAlpha::assemble_matrix() { fc.get(); fd.get(); - auto& t_stiffness = W->get_stiffness(); + W->get_stiffness() += W->get_geometry() + F5 / F2 * W->get_mass() + F6 / F2 * W->get_damping(); +} - t_stiffness += W->get_geometry(); +vec GeneralizedAlpha::get_force_residual() { return ImplicitIntegrator::get_force_residual() / F2; } - t_stiffness *= F2; - t_stiffness += F5 * W->get_mass() + F6 * W->get_damping(); -} +vec GeneralizedAlpha::get_displacement_residual() { return ImplicitIntegrator::get_displacement_residual() / F2; } + +sp_mat GeneralizedAlpha::get_reference_load() { return ImplicitIntegrator::get_reference_load() / F2; } int GeneralizedAlpha::process_load() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -90,7 +90,7 @@ int GeneralizedAlpha::process_load() { W->update_trial_time(F1 * current_time + F2 * trial_time); - const auto code = Integrator::process_load(); + const auto code = ImplicitIntegrator::process_load(); W->update_trial_time(trial_time); @@ -98,7 +98,7 @@ int GeneralizedAlpha::process_load() { } int GeneralizedAlpha::process_constraint() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -106,7 +106,7 @@ int GeneralizedAlpha::process_constraint() { W->update_trial_time(F1 * current_time + F2 * trial_time); - const auto code = Integrator::process_constraint(); + const auto code = ImplicitIntegrator::process_constraint(); W->update_trial_time(trial_time); @@ -114,7 +114,7 @@ int GeneralizedAlpha::process_constraint() { } int GeneralizedAlpha::process_load_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -122,7 +122,7 @@ int GeneralizedAlpha::process_load_resistance() { W->update_trial_time(F1 * current_time + F2 * trial_time); - const auto code = Integrator::process_load_resistance(); + const auto code = ImplicitIntegrator::process_load_resistance(); W->update_trial_time(trial_time); @@ -130,7 +130,7 @@ int GeneralizedAlpha::process_load_resistance() { } int GeneralizedAlpha::process_constraint_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); const sp_d auto current_time = W->get_current_time(); @@ -138,7 +138,7 @@ int GeneralizedAlpha::process_constraint_resistance() { W->update_trial_time(F1 * current_time + F2 * trial_time); - const auto code = Integrator::process_constraint_resistance(); + const auto code = ImplicitIntegrator::process_constraint_resistance(); W->update_trial_time(trial_time); @@ -146,7 +146,7 @@ int GeneralizedAlpha::process_constraint_resistance() { } int GeneralizedAlpha::update_trial_status() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); W->update_incre_acceleration(F7 * W->get_incre_displacement() + F8 * W->get_current_velocity() + F9 * W->get_current_acceleration()); @@ -166,31 +166,14 @@ void GeneralizedAlpha::update_parameter(const double NT) { F5 = F4 * F7; } -/** - * \brief update acceleration and velocity for zero displacement increment - */ -void GeneralizedAlpha::update_compatibility() const { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); - - W->update_incre_acceleration(F8 * W->get_current_velocity() + F9 * W->get_current_acceleration()); - W->update_incre_velocity(F10 * W->get_current_acceleration() + F11 * W->get_incre_acceleration()); - - auto& trial_dsp = W->get_trial_displacement(); - auto& trial_vel = W->get_trial_velocity(); - auto& trial_acc = W->get_trial_acceleration(); - - suanpan::for_all(D->get_node_pool(), [&](const shared_ptr& t_node) { t_node->update_trial_status(trial_dsp, trial_vel, trial_acc); }); -} - vec GeneralizedAlpha::from_incre_velocity(const vec& incre_velocity, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); return incre_velocity / (F11 * F7) + F10 * W->get_current_velocity()(encoding) - (F10 + F11 * F9) / (F11 * F7) * W->get_current_acceleration()(encoding) + W->get_current_displacement()(encoding); } vec GeneralizedAlpha::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); return incre_acceleration / F7 + F10 * W->get_current_velocity()(encoding) - F9 / F7 * W->get_current_acceleration()(encoding) + W->get_current_displacement()(encoding); } diff --git a/Solver/Integrator/GeneralizedAlpha.h b/Solver/Integrator/GeneralizedAlpha.h index 3ee8b48ce..08fb1aca9 100644 --- a/Solver/Integrator/GeneralizedAlpha.h +++ b/Solver/Integrator/GeneralizedAlpha.h @@ -39,7 +39,7 @@ #include "Integrator.h" -class GeneralizedAlpha final : public Integrator { +class GeneralizedAlpha final : public ImplicitIntegrator { const double alpha_f; const double alpha_m; const double gamma; @@ -56,6 +56,10 @@ class GeneralizedAlpha final : public Integrator { void assemble_resistance() override; void assemble_matrix() override; + vec get_force_residual() override; + vec get_displacement_residual() override; + sp_mat get_reference_load() override; + [[nodiscard]] int process_load() override; [[nodiscard]] int process_constraint() override; [[nodiscard]] int process_load_resistance() override; @@ -64,7 +68,6 @@ class GeneralizedAlpha final : public Integrator { int update_trial_status() override; void update_parameter(double) override; - void update_compatibility() const override; vec from_incre_velocity(const vec&, const uvec&) override; vec from_incre_acceleration(const vec&, const uvec&) override; diff --git a/Solver/Integrator/GeneralizedAlpha.tex b/Solver/Integrator/GeneralizedAlpha.tex index 8bb4a92b3..04e5ea835 100644 --- a/Solver/Integrator/GeneralizedAlpha.tex +++ b/Solver/Integrator/GeneralizedAlpha.tex @@ -1,77 +1,51 @@ \documentclass[a4paper,10pt,fleqn]{article} \usepackage[margin=20mm]{geometry} \usepackage{mathpazo,amsmath,amsfonts,amssymb} +\newcommand{\ddfrac}[2]{\dfrac{\mathrm{d}~#1}{\mathrm{d}~#2}} \begin{document} +\title{Generalised-$\alpha$}\date{} +\maketitle +\section{Integration} The generalized alpha method assumes that the displacement $d$ and the velocity $v$ are integrated as such, \begin{gather}\label{EQ1} d_{n+1}=d_n+\Delta{}tv_n+\Delta{}t^2\left(\left(\dfrac{1}{2}-\beta\right)a_n+\beta{}a_{n+1}\right),\\ v_{n+1}=v_n+\Delta{}t\left(1-\gamma\right)a_n+\Delta{}t\gamma{}a_{n+1}, \end{gather} -where $\beta$ and $\gamma$ are two parameters. From \eqref{EQ1}, the acceleration $a_{n+1}$ can be expressed as +where $\beta$ and $\gamma$ are two parameters. The integration relationship is identical to that of Newmark emthod. +\section{Updating} +From \eqref{EQ1}, the acceleration $a_{n+1}$ can be expressed as \begin{gather} a_{n+1}=\dfrac{1}{\beta\Delta{}t^2}d_{n+1}-\dfrac{1}{\beta\Delta{}t^2}d_n-\dfrac{1}{\beta\Delta{}t}v_n+\left(1-\dfrac{1}{2\beta}\right)a_n. \end{gather} - +Thus, according to chain rule +\begin{gather} +\ddfrac{a_{n+1}}{d_{n+1}}=\dfrac{1}{\beta\Delta{}t^2},\qquad{}\ddfrac{v_{n+1}}{d_{n+1}}=\dfrac{\gamma}{\beta\Delta{}t}. +\end{gather} +\section{EOM} The EOM is expressed at somewhere between $t_n$ and $t_{n+1}$. \begin{gather} -Ma_{n+1-\alpha_m}+Cv_{n+1-\alpha_f}+Kd_{n+1-\alpha_f}=F, +Ma_{n+1-\alpha_m}+Cv_{n+1-\alpha_f}+Kd_{n+1-\alpha_f}=F_{n+1-\alpha_f}, \end{gather} which can also be explicitly shown as \begin{gather} -M\left(\left(1-\alpha_m\right)a_{n+1}+\alpha_ma_n\right)+C\left(\left(1-\alpha_f\right)v_{n+1}+\alpha_fv_n\right)+K\left(\left(1-\alpha_f\right)d_{n+1}+\alpha_fd_n\right)=F, +\begin{split} +M\left(\left(1-\alpha_m\right)a_{n+1}+\alpha_ma_n\right)+C\left(\left(1-\alpha_f\right)v_{n+1}+\alpha_fv_n\right)+K\left(\left(1-\alpha_f\right)d_{n+1}+\alpha_fd_n\right)\\=\left(1-\alpha_f\right)F_{n+1}+\alpha_fF_n, +\end{split} \end{gather} where $\alpha_m$ and $\alpha_f$ are two additional parameters. - -By substituting $v_{n+1}$ and $a_{n+1}$ into the EOM, one arrives -\begin{multline} -M\left(1-\alpha_m\right)\left(\dfrac{1}{\beta\Delta{}t^2}d_{n+1}-\dfrac{1}{\beta\Delta{}t^2}d_n-\dfrac{1}{\beta\Delta{}t}v_n+\left(1-\dfrac{1}{2\beta}\right)a_n\right)+\\C\left(1-\alpha_f\right)\Delta{}t\gamma{}\left(\dfrac{1}{\beta\Delta{}t^2}d_{n+1}-\dfrac{1}{\beta\Delta{}t^2}d_n-\dfrac{1}{\beta\Delta{}t}v_n+\left(1-\dfrac{1}{2\beta}\right)a_n\right)+\\ -C\left(1-\alpha_f\right)\left(v_n+\Delta{}t\left(1-\gamma\right)a_n\right)+K\left(1-\alpha_f\right)d_{n+1}+M\alpha_ma_n+C\alpha_fv_n+K\alpha_fd_n=F. -\end{multline} - -After some rearrangements, the EOM can be expressed as -\begin{multline} -\left(\left(1-\alpha_f\right)K+\dfrac{1-\alpha_m}{\beta\Delta{}t^2}M+\left(1-\alpha_f\right)\dfrac{\gamma{}}{\beta\Delta{}t}C\right)d_{n+1}\\ -=F+M\left(\dfrac{1-\alpha_m}{\beta\Delta{}t^2}d_n+\dfrac{1-\alpha_m}{\beta\Delta{}t}v_n+\left(\dfrac{1-\alpha_m}{2\beta}-1\right)a_n\right)-K\alpha_fd_n\\ -+C\left(\left(1-\alpha_f\right)\dfrac{\gamma{}}{\beta\Delta{}t}d_n+\left(\gamma\dfrac{1-\alpha_f}{\beta}-1\right)v_n+\left(1-\alpha_f\right)\Delta{}t\left(\dfrac{\gamma}{2\beta}-1\right)a_n\right) -\end{multline} - -By denoting some constants as -\begin{gather*} -C_0=\dfrac{1-\alpha_m}{\beta\Delta{}t^2},\quad -C_1=\left(1-\alpha_f\right)\dfrac{\gamma{}}{\beta\Delta{}t},\quad -C_2=\dfrac{1-\alpha_m}{\beta\Delta{}t},\quad -C_3=\dfrac{1-\alpha_m}{2\beta}-1,\quad -C_4=\gamma\dfrac{1-\alpha_f}{\beta}-1,\\ -C_5=\left(1-\alpha_f\right)\Delta{}t\left(\dfrac{\gamma}{2\beta}-1\right),\quad -C_6=\Delta{}t-\Delta{}t\gamma,\quad -C_7=\Delta{}t\gamma{},\quad -C_8=1-\alpha_f,\quad -C_9=\alpha_f,\\ -C_{10}=\dfrac{1}{\beta\Delta{}t^2},\quad -C_{11}=-\dfrac{1}{\beta\Delta{}t},\quad -C_{12}=1-\dfrac{1}{2\beta}, -\end{gather*} -the EOM can be simplified as -\begin{gather} -\left(C_8K+C_0M+C_1C\right)d_{n+1}=F-KC_9d_n+M\left(C_0d_n+C_2v_n+C_3a_n\right)+C\left(C_1d_n+C_4v_n+C_5a_n\right). -\end{gather} - -Hence the equivalent stiffness and resistance can be written as -\begin{gather} -\bar{K}=C_8K+C_0M+C_1C,\\ -\bar{R}=F+M\left(C_0d_n+C_2v_n+C_3a_n\right)+C\left(C_1d_n+C_4v_n+C_5a_n\right)-KC_9d_n, -\end{gather} -so that +\section{Effective Stiffness} +The effective stiffness can be computed as \begin{gather} -\bar{K}d_{n+1}=\bar{R}. +\bar{K}=\left(1-\alpha_m\right)\dfrac{1}{\beta\Delta{}t^2}M+\left(1-\alpha_f\right)\dfrac{\gamma}{\beta\Delta{}t}C+\left(1-\alpha_f\right)K, \end{gather} - -After solving the displacement $d_{n+1}$, the acceleration $a_{n+1}$ can be recovered as +with $\Delta{}d=d_{n+1}-d_n$ be the unknown variable. Once converges, \begin{gather} -a_{n+1}=C_{10}d_{n+1}-C_{10}d_n+C_{11}v_n+C_{12}a_n, +a_{n+1}=\dfrac{1}{\beta\Delta{}t^2}\Delta{}d-\dfrac{1}{\beta\Delta{}t}v_n+\left(1-\dfrac{1}{2\beta}\right)a_n,\\ +v_{n+1}=v_n+\Delta{}t\left(1-\gamma\right)a_n+\Delta{}t\gamma{}a_{n+1}. \end{gather} -and further the velocity $v_{n+1}$ +Or, \begin{gather} -v_{n+1}=v_n+C_6a_n+C_7a_{n+1}. +\Delta{}a=\dfrac{1}{\beta\Delta{}t^2}\Delta{}d-\dfrac{1}{\beta\Delta{}t}v_n-\dfrac{1}{2\beta}a_n,\\ +\Delta{}v=\Delta{}ta_n+\Delta{}t\gamma{}\Delta{}a. \end{gather} -\end{document} +\end{document} \ No newline at end of file diff --git a/Solver/Integrator/GeneralizedAlphaExplicit.cpp b/Solver/Integrator/GeneralizedAlphaExplicit.cpp new file mode 100644 index 000000000..66fee4a74 --- /dev/null +++ b/Solver/Integrator/GeneralizedAlphaExplicit.cpp @@ -0,0 +1,138 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "GeneralizedAlphaExplicit.h" +#include +#include + +GeneralizedAlphaExplicit::GeneralizedAlphaExplicit(const unsigned T, const double R) + : ExplicitIntegrator(T) + , B((R * R - 5. * R + 10) / 6. / (R - 2.) / (R + 1.)) + , AM((2. * R - 1) / (1. + R)) + , AF(AM - .5) {} + +bool GeneralizedAlphaExplicit::has_corrector() const { return true; } + +void GeneralizedAlphaExplicit::assemble_resistance() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + auto fa = std::async([&] { D->assemble_resistance(); }); + auto fb = std::async([&] { D->assemble_damping_force(); }); + auto fc = std::async([&] { D->assemble_inertial_force(); }); + + fa.get(); + fb.get(); + fc.get(); + + W->set_sushi(W->get_trial_resistance() - AF * W->get_incre_resistance() + W->get_trial_damping_force() - AF * W->get_incre_damping_force() + W->get_trial_inertial_force() - AM * W->get_incre_inertial_force()); +} + +void GeneralizedAlphaExplicit::assemble_matrix() { get_domain()->assemble_trial_mass(); } + +vec GeneralizedAlphaExplicit::get_force_residual() { return ExplicitIntegrator::get_force_residual() / (1. - AM); } + +vec GeneralizedAlphaExplicit::get_displacement_residual() { return ExplicitIntegrator::get_displacement_residual() / (1. - AM); } + +sp_mat GeneralizedAlphaExplicit::get_reference_load() { return ExplicitIntegrator::get_reference_load() / (1. - AM); } + +int GeneralizedAlphaExplicit::process_load() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + const sp_d auto current_time = W->get_current_time(); + const sp_d auto trial_time = W->get_trial_time(); + + W->update_trial_time(AF * current_time + (1. - AF) * trial_time); + + const auto code = ExplicitIntegrator::process_load(); + + W->update_trial_time(trial_time); + + return code; +} + +int GeneralizedAlphaExplicit::process_constraint() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + const sp_d auto current_time = W->get_current_time(); + const sp_d auto trial_time = W->get_trial_time(); + + W->update_trial_time(AF * current_time + (1. - AF) * trial_time); + + const auto code = ExplicitIntegrator::process_constraint(); + + W->update_trial_time(trial_time); + + return code; +} + +int GeneralizedAlphaExplicit::process_load_resistance() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + const sp_d auto current_time = W->get_current_time(); + const sp_d auto trial_time = W->get_trial_time(); + + W->update_trial_time(AF * current_time + (1. - AF) * trial_time); + + const auto code = ExplicitIntegrator::process_load_resistance(); + + W->update_trial_time(trial_time); + + return code; +} + +int GeneralizedAlphaExplicit::process_constraint_resistance() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + const sp_d auto current_time = W->get_current_time(); + const sp_d auto trial_time = W->get_trial_time(); + + W->update_trial_time(AF * current_time + (1. - AF) * trial_time); + + const auto code = ExplicitIntegrator::process_constraint_resistance(); + + W->update_trial_time(trial_time); + + return code; +} + +int GeneralizedAlphaExplicit::update_trial_status() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + W->update_incre_displacement(DT * W->get_current_velocity() + (.5 - B) * DT * DT * W->get_current_acceleration()); + W->update_incre_velocity(DT * W->get_current_acceleration()); + + return D->update_trial_status(); +} + +int GeneralizedAlphaExplicit::correct_trial_status() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + W->update_trial_displacement_by(B * DT * DT * W->get_trial_acceleration()); + + return D->update_trial_status(); +} + +void GeneralizedAlphaExplicit::update_parameter(const double NT) { DT = NT; } + +void GeneralizedAlphaExplicit::print() { suanpan_info("An explicit GeneralizedAlpha solver.\n"); } diff --git a/Solver/Integrator/GeneralizedAlphaExplicit.h b/Solver/Integrator/GeneralizedAlphaExplicit.h new file mode 100644 index 000000000..d94751033 --- /dev/null +++ b/Solver/Integrator/GeneralizedAlphaExplicit.h @@ -0,0 +1,65 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class GeneralizedAlphaExplicit + * @brief A GeneralizedAlphaExplicit class defines a solver using GeneralizedAlphaExplicit algorithm. + * + * @author tlc + * @date 03/12/2022 + * @version 0.1.0 + * @file GeneralizedAlphaExplicit.h + * @addtogroup Integrator + * @{ + */ + +#ifndef GENERALIZEDALPHAEXPLICIT_H +#define GENERALIZEDALPHAEXPLICIT_H + +#include "Integrator.h" + +class GeneralizedAlphaExplicit final : public ExplicitIntegrator { + const double B, AM, AF; + double DT{0.}; + +public: + GeneralizedAlphaExplicit(unsigned, double); + + [[nodiscard]] bool has_corrector() const override; + + void assemble_resistance() override; + void assemble_matrix() override; + + vec get_force_residual() override; + vec get_displacement_residual() override; + sp_mat get_reference_load() override; + + [[nodiscard]] int process_load() override; + [[nodiscard]] int process_constraint() override; + [[nodiscard]] int process_load_resistance() override; + [[nodiscard]] int process_constraint_resistance() override; + + int update_trial_status() override; + int correct_trial_status() override; + + void update_parameter(double) override; + + void print() override; +}; + +#endif + +//! @} diff --git a/Solver/Integrator/Integrator b/Solver/Integrator/Integrator index 7b125cbfe..7908516f5 100644 --- a/Solver/Integrator/Integrator +++ b/Solver/Integrator/Integrator @@ -1,11 +1,15 @@ #include "Integrator.h" +#include "BatheExplicit.h" #include "BatheTwoStep.h" -#include "GSSSS.h" #include "GeneralizedAlpha.h" +#include "GeneralizedAlphaExplicit.h" +#include "GSSSS.h" #include "LeeNewmark.h" #include "LeeNewmarkBase.h" #include "LeeNewmarkFull.h" #include "Newmark.h" +#include "OALTS.h" #include "RayleighNewmark.h" +#include "Tchamwa.h" #include "WilsonPenzienNewmark.h" \ No newline at end of file diff --git a/Solver/Integrator/Integrator.cpp b/Solver/Integrator/Integrator.cpp index c29fd791c..50f47e88e 100644 --- a/Solver/Integrator/Integrator.cpp +++ b/Solver/Integrator/Integrator.cpp @@ -17,7 +17,7 @@ #include "Integrator.h" #include -#include +#include Integrator::Integrator(const unsigned T) : Tag(T) { suanpan_debug("Integrator %u ctor() called.\n", T); } @@ -26,7 +26,7 @@ Integrator::~Integrator() { suanpan_debug("Integrator %u dtor() called.\n", get_ void Integrator::set_domain(const weak_ptr& D) { if(database.lock() != D.lock()) database = D; } -const weak_ptr& Integrator::get_domain() const { return database; } +shared_ptr Integrator::get_domain() const { return database.lock(); } int Integrator::initialize() { if(nullptr != database.lock()) return SUANPAN_SUCCESS; @@ -44,6 +44,14 @@ void Integrator::set_time_step_switch(const bool T) { time_step_switch = T; } */ bool Integrator::allow_to_change_time_step() const { return time_step_switch; } +void Integrator::set_matrix_assembled_switch(const bool T) { matrix_assembled_switch = T; } + +bool Integrator::matrix_is_assembled() const { return matrix_assembled_switch; } + +bool Integrator::has_corrector() const { return false; } + +bool Integrator::time_independent_matrix() const { return true; } + int Integrator::process_load() { return database.lock()->process_load(true); } /** @@ -54,7 +62,7 @@ int Integrator::process_load() { return database.lock()->process_load(true); } */ int Integrator::process_constraint() { const auto& D = database.lock(); - const auto& W = D->get_factory(); + auto& W = D->get_factory(); const auto code = D->process_constraint(true); @@ -70,7 +78,7 @@ int Integrator::process_criterion() { return database.lock()->process_criterion( int Integrator::process_modifier() { return database.lock()->process_modifier(); } -int Integrator::process_load_resistance() { return SUANPAN_SUCCESS; } +int Integrator::process_load_resistance() { return database.lock()->process_load(false); } /** * This method is similar to process_constraint(), but it only updates the global residual vector. @@ -81,7 +89,7 @@ int Integrator::process_load_resistance() { return SUANPAN_SUCCESS; } */ int Integrator::process_constraint_resistance() { const auto& D = database.lock(); - const auto& W = D->get_factory(); + auto& W = D->get_factory(); const auto code = D->process_constraint(false); @@ -94,7 +102,7 @@ void Integrator::record() const { database.lock()->record(); } void Integrator::assemble_resistance() { const auto& D = database.lock(); - const auto& W = D->get_factory(); + auto& W = D->get_factory(); D->assemble_resistance(); W->set_sushi(W->get_trial_resistance()); } @@ -116,7 +124,7 @@ void Integrator::assemble_matrix() { */ vec Integrator::get_force_residual() { const auto& D = database.lock(); - const auto& W = D->get_factory(); + auto& W = D->get_factory(); vec residual = W->get_trial_load() - W->get_sushi(); @@ -131,7 +139,7 @@ vec Integrator::get_force_residual() { */ vec Integrator::get_displacement_residual() { const auto& D = database.lock(); - const auto& W = D->get_factory(); + auto& W = D->get_factory(); vec residual = W->get_reference_load() * W->get_trial_load_factor() + W->get_trial_load() - W->get_sushi(); @@ -144,14 +152,14 @@ vec Integrator::get_displacement_residual() { * Assemble the global residual vector due to nonlinear constraints implemented via the multiplier method. */ vec Integrator::get_auxiliary_residual() { - const auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); return W->get_auxiliary_load() - W->get_auxiliary_resistance(); } sp_mat Integrator::get_reference_load() { return database.lock()->get_factory()->get_reference_load(); } -sp_mat Integrator::get_auxiliary_stiffness() { return database.lock()->get_factory()->get_auxiliary_stiffness(); } +const vec& Integrator::get_trial_displacement() const { return database.lock()->get_factory()->get_trial_displacement(); } void Integrator::update_load() { database.lock()->update_load(); } @@ -160,35 +168,75 @@ void Integrator::update_constraint() { database.lock()->update_constraint(); } void Integrator::update_trial_load_factor(const double lambda) { update_trial_load_factor(vec{lambda}); } void Integrator::update_trial_load_factor(const vec& lambda) { - const auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); W->update_trial_load_factor_by(lambda); } -void Integrator::update_trial_displacement(const vec& ninja) { - const auto& W = get_domain().lock()->get_factory(); - W->update_trial_displacement_by(ninja); +void Integrator::update_from_ninja() { + auto& W = get_domain()->get_factory(); + W->update_trial_displacement_by(W->get_ninja()); } void Integrator::update_trial_time(const double T) { - const auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); W->update_trial_time(T); update_parameter(W->get_incre_time()); } void Integrator::update_incre_time(const double T) { - const auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); W->update_incre_time(T); update_parameter(W->get_incre_time()); } -int Integrator::update_trial_status() { return database.lock()->update_trial_status(); } +int Integrator::update_trial_status() { + const auto& D = get_domain(); + auto& W = D->get_factory(); -int Integrator::update_incre_status() { return database.lock()->update_incre_status(); } + return suanpan::approx_equal(norm(W->get_incre_displacement()), 0.) ? SUANPAN_SUCCESS : D->update_trial_status(); +} + +int Integrator::correct_trial_status() { return SUANPAN_SUCCESS; } + +/** + * When a new displacement increment is computed, it is added to global displacement vector. + * At this moment, nodal and elemental quantities are all computed from the previous displacement + * vector, directly committing the new results causes out-of-sync issue. + * Some algorithms use predictor-corrector type scheme, which means the converged quantities are + * different from the committed quantities. + * This method is in charge of syncing quantities between global and local quantities by updating + * nodal/elemental quantities using the committed quantities. + */ +int Integrator::sync_status(const bool only_correct) { + auto handle_force = [&] { + // process modifiers + if(SUANPAN_SUCCESS != process_modifier()) return SUANPAN_FAIL; + // assemble resistance + assemble_resistance(); + return SUANPAN_SUCCESS; + }; + + // only perform corrector if defined + if(only_correct) { + if(!has_corrector()) return SUANPAN_SUCCESS; + + if(SUANPAN_SUCCESS != correct_trial_status()) return SUANPAN_FAIL; + + return handle_force(); + } + + // perform corrector/predictor depending on the algorithm + if(SUANPAN_SUCCESS != (has_corrector() ? correct_trial_status() : update_trial_status())) return SUANPAN_FAIL; + + return handle_force(); +} /** - * Must change ninja to the real displacement increment. + * Some algorithms solve a system which differs from the original one. + * The size of the problem changes thus the computed increment contains additional internal + * quantities. This methods updates internal quantities stored in those integrators. */ -int Integrator::update_internal(const mat&) { return 0; } +int Integrator::update_internal(const mat&) { return SUANPAN_SUCCESS; } mat Integrator::solve(const mat& B) { mat X; @@ -227,7 +275,13 @@ int Integrator::solve(mat& X, sp_mat&& B) { return database.lock()->get_factory( * The penalty method can apply homogeneous constraints approximately. * The corresponding DoF shall be set to zero after solving the system. */ -void Integrator::erase_machine_error() const { database.lock()->erase_machine_error(); } +void Integrator::erase_machine_error(vec& ninja) const { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + D->erase_machine_error(ninja); + get_ninja(W) = ninja.head(W->get_size()); +} void Integrator::stage_and_commit_status() { stage_status(); @@ -236,44 +290,27 @@ void Integrator::stage_and_commit_status() { void Integrator::stage_status() { database.lock()->stage_status(); } -void Integrator::commit_status() { - database.lock()->commit_status(); - update_compatibility(); -} +void Integrator::commit_status() { database.lock()->commit_status(); } void Integrator::clear_status() { + matrix_assembled_switch = false; database.lock()->clear_status(); - update_compatibility(); } -void Integrator::reset_status() { - database.lock()->reset_status(); - update_compatibility(); -} +void Integrator::reset_status() { database.lock()->reset_status(); } /** - * When tim step changes, some parameters may need to be updated. + * When time step changes, some parameters may need to be updated. */ void Integrator::update_parameter(double) {} -/** - * Make sure that the trial displacement/velocity/acceleration are consistent with each other. - * When starting a new trial state, the trial displacement is identical to the current displacement. - * This essentially means that the displacement increment is zero. - * To have such a trial state with the given time step, the trial velocity and acceleration shall be - * updated to be compatible with the trial displacement. - * - * On exit, trial velocity and acceleration should be computed from current/trial displacement. - */ -void Integrator::update_compatibility() const {} - /** * When external loads are applied, they can be applied in forms of displacement/velocity/acceleration. * The time integration methods, by default, form effective stiffness matrices in displacement domain. * That is, in AX=B, A is the effective stiffness matrix and X is the displacement increment. * Thus, loads in velocity/acceleration must be converted to displacement. * This cannot be done arbitrarily due to compatibility issues. - * This method takes velocity increment and converts it to displacement increment. + * This method takes velocity increment and converts it to TOTAL displacement. */ vec Integrator::from_incre_velocity(const vec&, const uvec& encoding) { return zeros(encoding.n_elem); } @@ -283,12 +320,12 @@ vec Integrator::from_incre_velocity(const vec&, const uvec& encoding) { return z * That is, in AX=B, A is the effective stiffness matrix and X is the displacement increment. * Thus, loads in velocity/acceleration must be converted to displacement. * This cannot be done arbitrarily due to compatibility issues. - * This method takes acceleration increment and converts it to displacement increment. + * This method takes acceleration increment and converts it to TOTAL displacement. */ vec Integrator::from_incre_acceleration(const vec&, const uvec& encoding) { return zeros(encoding.n_elem); } vec Integrator::from_total_velocity(const vec& total_velocity, const uvec& encoding) { - const auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); if(AnalysisType::DYNAMICS != W->get_analysis_type()) return zeros(encoding.n_elem); @@ -296,7 +333,7 @@ vec Integrator::from_total_velocity(const vec& total_velocity, const uvec& encod } vec Integrator::from_total_acceleration(const vec& total_acceleration, const uvec& encoding) { - const auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); if(AnalysisType::DYNAMICS != W->get_analysis_type()) return zeros(encoding.n_elem); @@ -318,3 +355,26 @@ vec Integrator::from_incre_acceleration(const double magnitude, const uvec& enco vec Integrator::from_total_velocity(const double magnitude, const uvec& encoding) { return from_total_velocity(vec(encoding.n_elem, fill::value(magnitude)), encoding); } vec Integrator::from_total_acceleration(const double magnitude, const uvec& encoding) { return from_total_acceleration(vec(encoding.n_elem, fill::value(magnitude)), encoding); } + +bool ImplicitIntegrator::time_independent_matrix() const { return false; } + +const vec& ExplicitIntegrator::get_trial_displacement() const { return get_domain()->get_factory()->get_trial_acceleration(); } + +void ExplicitIntegrator::update_from_ninja() { + const auto& W = get_domain()->get_factory(); + W->update_trial_acceleration_by(W->get_ninja()); +} + +int ExplicitIntegrator::solve(mat& X, const mat& B) { return get_domain()->get_factory()->get_mass()->solve(X, B); } + +int ExplicitIntegrator::solve(mat& X, const sp_mat& B) { return get_domain()->get_factory()->get_mass()->solve(X, B); } + +int ExplicitIntegrator::solve(mat& X, mat&& B) { return get_domain()->get_factory()->get_mass()->solve(X, std::forward(B)); } + +int ExplicitIntegrator::solve(mat& X, sp_mat&& B) { return get_domain()->get_factory()->get_mass()->solve(X, std::forward(B)); } + +vec ExplicitIntegrator::from_incre_velocity(const vec&, const uvec&) { throw invalid_argument("support velocity cannot be used with explicit integrator"); } + +vec ExplicitIntegrator::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { return get_domain()->get_factory()->get_current_acceleration()(encoding) + incre_acceleration; } + +vec ExplicitIntegrator::from_total_acceleration(const vec& total_acceleration, const uvec&) { return total_acceleration; } diff --git a/Solver/Integrator/Integrator.h b/Solver/Integrator/Integrator.h index 871a048fd..7ec9ffb84 100644 --- a/Solver/Integrator/Integrator.h +++ b/Solver/Integrator/Integrator.h @@ -43,8 +43,14 @@ class DomainBase; +enum class IntegratorType { + Implicit, + Explicit +}; + class Integrator : public Tag { bool time_step_switch = true; + bool matrix_assembled_switch = false; weak_ptr database; @@ -57,14 +63,23 @@ class Integrator : public Tag { ~Integrator() override; void set_domain(const weak_ptr&); - [[nodiscard]] const weak_ptr& get_domain() const; + [[nodiscard]] shared_ptr get_domain() const; virtual int initialize(); + [[nodiscard]] virtual constexpr IntegratorType type() const { return IntegratorType::Implicit; } + // ! some multistep integrators may require fixed time step for some consecutive sub-steps void set_time_step_switch(bool); [[nodiscard]] bool allow_to_change_time_step() const; + // ! manually set switch after assembling global matrix + void set_matrix_assembled_switch(bool); + [[nodiscard]] bool matrix_is_assembled() const; + + [[nodiscard]] virtual bool has_corrector() const; + [[nodiscard]] virtual bool time_independent_matrix() const; + [[nodiscard]] virtual int process_load(); [[nodiscard]] virtual int process_constraint(); [[nodiscard]] virtual int process_criterion(); @@ -81,19 +96,23 @@ class Integrator : public Tag { virtual vec get_displacement_residual(); virtual vec get_auxiliary_residual(); virtual sp_mat get_reference_load(); - virtual sp_mat get_auxiliary_stiffness(); + + [[nodiscard]] virtual const vec& get_trial_displacement() const; virtual void update_load(); virtual void update_constraint(); virtual void update_trial_load_factor(double); virtual void update_trial_load_factor(const vec&); - virtual void update_trial_displacement(const vec&); + virtual void update_from_ninja(); + + virtual void update_trial_time(double); + virtual void update_incre_time(double); - void update_trial_time(double); - void update_incre_time(double); virtual int update_trial_status(); - virtual int update_incre_status(); + virtual int correct_trial_status(); + + virtual int sync_status(bool); virtual int update_internal(const mat&); @@ -106,9 +125,9 @@ class Integrator : public Tag { virtual int solve(mat&, mat&&); virtual int solve(mat&, sp_mat&&); - virtual void erase_machine_error() const; + virtual void erase_machine_error(vec&) const; - virtual void stage_and_commit_status(); + void stage_and_commit_status(); virtual void stage_status(); virtual void commit_status(); @@ -116,18 +135,47 @@ class Integrator : public Tag { virtual void reset_status(); virtual void update_parameter(double); - virtual void update_compatibility() const; virtual vec from_incre_velocity(const vec&, const uvec&); // obtain target displacement from increment of velocity virtual vec from_incre_acceleration(const vec&, const uvec&); // obtain target displacement from increment of acceleration - vec from_total_velocity(const vec&, const uvec&); - vec from_total_acceleration(const vec&, const uvec&); + virtual vec from_total_velocity(const vec&, const uvec&); + virtual vec from_total_acceleration(const vec&, const uvec&); vec from_incre_velocity(double, const uvec&); vec from_incre_acceleration(double, const uvec&); vec from_total_velocity(double, const uvec&); vec from_total_acceleration(double, const uvec&); }; +class ImplicitIntegrator : public Integrator { +public: + using Integrator::Integrator; + + [[nodiscard]] constexpr IntegratorType type() const override { return IntegratorType::Implicit; } + + [[nodiscard]] bool time_independent_matrix() const override; +}; + +class ExplicitIntegrator : public Integrator { +public: + using Integrator::Integrator; + + [[nodiscard]] constexpr IntegratorType type() const override { return IntegratorType::Explicit; } + + [[nodiscard]] const vec& get_trial_displacement() const override; + + void update_from_ninja() override; + + int solve(mat&, const mat&) override; + int solve(mat&, const sp_mat&) override; + int solve(mat&, mat&&) override; + int solve(mat&, sp_mat&&) override; + + vec from_incre_velocity(const vec&, const uvec&) override; + + vec from_incre_acceleration(const vec&, const uvec&) override; // obtain target acceleration from increment of acceleration + vec from_total_acceleration(const vec&, const uvec&) override; +}; + #endif //! @} diff --git a/Solver/Integrator/LeeNewmark.cpp b/Solver/Integrator/LeeNewmark.cpp index c2f899a63..bf8e3bd06 100644 --- a/Solver/Integrator/LeeNewmark.cpp +++ b/Solver/Integrator/LeeNewmark.cpp @@ -98,10 +98,10 @@ int LeeNewmark::initialize() { } int LeeNewmark::process_constraint() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); // process constraint for the first time to obtain proper stiffness - if(SUANPAN_SUCCESS != Integrator::process_constraint()) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != LeeNewmarkBase::process_constraint()) return SUANPAN_FAIL; // this stiffness contains geometry, mass and damping which are handled in Newmark::assemble_matrix() auto& t_stiff = get_stiffness(factory); @@ -114,34 +114,7 @@ int LeeNewmark::process_constraint() { initialize_mass(D); } - else { - // if not first iteration - // erase the tangent stiffness entries - - uword *ptr_a, *ptr_b; - - if(t_triplet.is_csc_sorted()) { - ptr_a = t_triplet.col_mem(); - ptr_b = t_triplet.row_mem(); - } - else if(t_triplet.is_csr_sorted()) { - ptr_a = t_triplet.row_mem(); - ptr_b = t_triplet.col_mem(); - } - else { - suanpan_error("the system is not sorted while entering iteration, please file a bug report.\n"); - return SUANPAN_FAIL; - } - - const auto& val = t_triplet.val_mem(); - - for(uword I = 0; I < t_triplet.n_elem; ++I) { - // quit if current column/row is beyond the original size of matrix - if(ptr_a[I] >= n_block) break; - // erase existing entries if fall in intact stiffness matrix - if(ptr_b[I] < n_block) val[I] = 0.; - } - } + else if(SUANPAN_SUCCESS != erase_top_left_block()) return SUANPAN_FAIL; t_stiff += C1 * CM * current_mass; @@ -156,7 +129,7 @@ int LeeNewmark::process_constraint() { // check in constant terms that does not change in the substep initialize_stiffness(D); - if(SUANPAN_SUCCESS != Integrator::process_constraint()) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != LeeNewmarkBase::process_constraint()) return SUANPAN_FAIL; t_stiff->csc_condense(); current_stiffness.swap(t_stiff); @@ -170,8 +143,14 @@ int LeeNewmark::process_constraint() { return SUANPAN_SUCCESS; } +int LeeNewmark::process_constraint_resistance() { + update_residual(); + + return LeeNewmarkBase::process_constraint_resistance(); +} + void LeeNewmark::assemble_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); const auto& W = factory; D->assemble_resistance(); diff --git a/Solver/Integrator/LeeNewmark.h b/Solver/Integrator/LeeNewmark.h index c13edaf7c..48b8d3b45 100644 --- a/Solver/Integrator/LeeNewmark.h +++ b/Solver/Integrator/LeeNewmark.h @@ -61,6 +61,7 @@ class LeeNewmark : public LeeNewmarkBase { int initialize() override; int process_constraint() override; + int process_constraint_resistance() override; void assemble_resistance() override; diff --git a/Solver/Integrator/LeeNewmarkBase.cpp b/Solver/Integrator/LeeNewmarkBase.cpp index db685ffe7..20a58765a 100644 --- a/Solver/Integrator/LeeNewmarkBase.cpp +++ b/Solver/Integrator/LeeNewmarkBase.cpp @@ -19,6 +19,36 @@ #include #include +int LeeNewmarkBase::erase_top_left_block() const { + auto& t_triplet = stiffness->triplet_mat; + + uword *ptr_a, *ptr_b; + + if(t_triplet.is_csc_sorted()) { + ptr_a = t_triplet.col_mem(); + ptr_b = t_triplet.row_mem(); + } + else if(t_triplet.is_csr_sorted()) { + ptr_a = t_triplet.row_mem(); + ptr_b = t_triplet.col_mem(); + } + else { + suanpan_error("the system is not sorted while entering iteration, please file a bug report.\n"); + return SUANPAN_FAIL; + } + + const auto& val = t_triplet.val_mem(); + + for(uword I = 0; I < t_triplet.n_elem; ++I) { + // quit if current column/row is beyond the original size of matrix + if(ptr_a[I] >= n_block) break; + // erase existing entries if fall in intact stiffness matrix + if(ptr_b[I] < n_block) val[I] = 0.; + } + + return SUANPAN_SUCCESS; +} + LeeNewmarkBase::LeeNewmarkBase(const unsigned T, const double A, const double B, const StiffnessType ST) : Newmark(T, A, B) , n_block(0) @@ -27,7 +57,7 @@ LeeNewmarkBase::LeeNewmarkBase(const unsigned T, const double A, const double B, int LeeNewmarkBase::initialize() { if(Newmark::initialize() != SUANPAN_SUCCESS) return SUANPAN_FAIL; - factory = get_domain().lock()->get_factory(); + factory = get_domain()->get_factory(); access::rw(n_block) = factory->get_size(); @@ -53,10 +83,6 @@ int LeeNewmarkBase::initialize() { int LeeNewmarkBase::update_internal(const mat& t_internal) { trial_internal += t_internal; - // order matters - // cannot resize before assignment - get_ninja(factory).resize(n_block); - return SUANPAN_SUCCESS; } diff --git a/Solver/Integrator/LeeNewmarkBase.h b/Solver/Integrator/LeeNewmarkBase.h index 708611632..d36f6126b 100644 --- a/Solver/Integrator/LeeNewmarkBase.h +++ b/Solver/Integrator/LeeNewmarkBase.h @@ -61,6 +61,8 @@ class LeeNewmarkBase : public Newmark { virtual void update_stiffness() const = 0; virtual void update_residual() const = 0; + int erase_top_left_block() const; + public: explicit LeeNewmarkBase(unsigned, double, double, StiffnessType = StiffnessType::CURRENT); diff --git a/Solver/Integrator/LeeNewmarkFull.cpp b/Solver/Integrator/LeeNewmarkFull.cpp index 024de0369..39949b9dc 100644 --- a/Solver/Integrator/LeeNewmarkFull.cpp +++ b/Solver/Integrator/LeeNewmarkFull.cpp @@ -359,10 +359,10 @@ int LeeNewmarkFull::initialize() { } int LeeNewmarkFull::process_constraint() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); // process constraint for the first time to obtain proper stiffness - if(SUANPAN_SUCCESS != Integrator::process_constraint()) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != LeeNewmarkBase::process_constraint()) return SUANPAN_FAIL; // this stiffness contains geometry, mass and damping from Newmark::assemble_matrix() auto& t_stiff = factory->get_stiffness()->triplet_mat; @@ -461,7 +461,7 @@ int LeeNewmarkFull::process_constraint() { f_mass.get(); // now apply constraints - if(SUANPAN_SUCCESS != Integrator::process_constraint()) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != LeeNewmarkBase::process_constraint()) return SUANPAN_FAIL; t_stiff.csc_condense(); // move original stiffness matrix back @@ -497,30 +497,7 @@ int LeeNewmarkFull::process_constraint() { else { // if not first iteration // erase the tangent stiffness entries - - uword *ptr_a, *ptr_b; - - if(t_triplet.is_csc_sorted()) { - ptr_a = t_triplet.col_mem(); - ptr_b = t_triplet.row_mem(); - } - else if(t_triplet.is_csr_sorted()) { - ptr_a = t_triplet.row_mem(); - ptr_b = t_triplet.col_mem(); - } - else { - suanpan_error("the system is not sorted while entering iteration, please file a bug report.\n"); - return SUANPAN_FAIL; - } - - const auto& val = t_triplet.val_mem(); - - for(uword I = 0; I < t_triplet.n_elem; ++I) { - // quit if current column/row is beyond the original size of matrix - if(ptr_a[I] >= n_block) break; - // erase existing entries if fall in intact stiffness matrix - if(ptr_b[I] < n_block) val[I] = 0.; - } + if(SUANPAN_SUCCESS != erase_top_left_block()) return SUANPAN_FAIL; // check in original nonzero entries in unrolled damping matrix stiffness += rabbit; @@ -533,4 +510,17 @@ int LeeNewmarkFull::process_constraint() { return SUANPAN_SUCCESS; } +int LeeNewmarkFull::process_constraint_resistance() { + if(SUANPAN_SUCCESS != erase_top_left_block()) return SUANPAN_FAIL; + + // check in original nonzero entries in unrolled damping matrix + stiffness += rabbit; + + update_residual(); + + stiffness += factory->get_stiffness()->triplet_mat; + + return LeeNewmarkBase::process_constraint_resistance(); +} + void LeeNewmarkFull::print() { suanpan_info("A Newmark solver using Lee's damping model with adjustable bandwidth using %s stiffness. doi: 10.1016/j.compstruc.2020.106423 and 10.1016/j.compstruc.2021.106663\n", stiffness_type == StiffnessType::TRIAL ? "tangent" : stiffness_type == StiffnessType::CURRENT ? "converged" : "initial"); } diff --git a/Solver/Integrator/LeeNewmarkFull.h b/Solver/Integrator/LeeNewmarkFull.h index e54b38754..00df1aa52 100644 --- a/Solver/Integrator/LeeNewmarkFull.h +++ b/Solver/Integrator/LeeNewmarkFull.h @@ -91,6 +91,7 @@ class LeeNewmarkFull final : public LeeNewmarkBase { int initialize() override; int process_constraint() override; + int process_constraint_resistance() override; void print() override; }; diff --git a/Solver/Integrator/Newmark.cpp b/Solver/Integrator/Newmark.cpp index b6b9d1eab..7c9c0e13d 100644 --- a/Solver/Integrator/Newmark.cpp +++ b/Solver/Integrator/Newmark.cpp @@ -18,15 +18,14 @@ #include "Newmark.h" #include #include -#include Newmark::Newmark(const unsigned T, const double A, const double B) - : Integrator(T) + : ImplicitIntegrator(T) , beta(A) , gamma(B) {} void Newmark::assemble_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_resistance(); }); @@ -41,7 +40,7 @@ void Newmark::assemble_resistance() { } void Newmark::assemble_matrix() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_trial_stiffness(); }); @@ -58,7 +57,7 @@ void Newmark::assemble_matrix() { } int Newmark::update_trial_status() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); W->update_incre_acceleration(C0 * W->get_incre_displacement() - C2 * W->get_current_velocity() - C4 * W->get_current_acceleration()); @@ -67,31 +66,14 @@ int Newmark::update_trial_status() { return D->update_trial_status(); } -/** - * \brief update acceleration and velocity for zero displacement increment - */ -void Newmark::update_compatibility() const { - const auto& D = get_domain().lock(); - auto& W = D->get_factory(); - - W->update_incre_acceleration(-C2 * W->get_current_velocity() - C4 * W->get_current_acceleration()); - W->update_incre_velocity(C5 * W->get_current_acceleration() + C3 * W->get_incre_acceleration()); - - auto& trial_dsp = W->get_trial_displacement(); - auto& trial_vel = W->get_trial_velocity(); - auto& trial_acc = W->get_trial_acceleration(); - - suanpan::for_all(D->get_node_pool(), [&](const shared_ptr& t_node) { t_node->update_trial_status(trial_dsp, trial_vel, trial_acc); }); -} - vec Newmark::from_incre_velocity(const vec& incre_velocity, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); return incre_velocity / C1 + C5 * W->get_current_velocity()(encoding) + (C3 * C4 - C5) / C1 * W->get_current_acceleration()(encoding) + W->get_current_displacement()(encoding); } vec Newmark::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); return incre_acceleration / C0 + C5 * W->get_current_velocity()(encoding) + C4 / C0 * W->get_current_acceleration()(encoding) + W->get_current_displacement()(encoding); } diff --git a/Solver/Integrator/Newmark.h b/Solver/Integrator/Newmark.h index 016dbbc5a..0674d7565 100644 --- a/Solver/Integrator/Newmark.h +++ b/Solver/Integrator/Newmark.h @@ -42,7 +42,7 @@ #include "Integrator.h" -class Newmark : public Integrator { +class Newmark : public ImplicitIntegrator { const double beta; /**< parameter = .25 */ const double gamma; /**< parameter = .5 */ protected: @@ -56,7 +56,6 @@ class Newmark : public Integrator { int update_trial_status() override; void update_parameter(double) override; - void update_compatibility() const override; vec from_incre_velocity(const vec&, const uvec&) override; vec from_incre_acceleration(const vec&, const uvec&) override; diff --git a/Solver/Integrator/OALTS.cpp b/Solver/Integrator/OALTS.cpp new file mode 100644 index 000000000..a3c1a9b07 --- /dev/null +++ b/Solver/Integrator/OALTS.cpp @@ -0,0 +1,139 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "OALTS.h" +#include +#include + +OALTS::OALTS(const unsigned T, const double R) + : ImplicitIntegrator(T) + , A1((4. * R - 4.) / (3. - R)) + , A2(-1. - A1) + , B0(2. / (1. + R) / (3. - R)) + , B1(2. - 2. * B0 + .5 * A1) + , B2(.5 * A1 + B0) + , B10(B1 / B0) + , B20(B2 / B0) { set_time_step_switch(false); } + +void OALTS::assemble_resistance() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + auto fa = std::async([&] { D->assemble_resistance(); }); + auto fb = std::async([&] { D->assemble_damping_force(); }); + auto fc = std::async([&] { D->assemble_inertial_force(); }); + + fa.get(); + fb.get(); + fc.get(); + + W->set_sushi(W->get_trial_resistance() + W->get_trial_damping_force() + W->get_trial_inertial_force()); +} + +void OALTS::assemble_matrix() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + auto fa = std::async([&] { D->assemble_trial_stiffness(); }); + auto fb = std::async([&] { D->assemble_trial_geometry(); }); + auto fc = std::async([&] { D->assemble_trial_damping(); }); + auto fd = std::async([&] { D->assemble_trial_mass(); }); + + fa.get(); + fb.get(); + fc.get(); + fd.get(); + + if(if_starting) [[unlikely]] W->get_stiffness() += W->get_geometry() + 4. / DT / DT * W->get_mass() + 2. / DT * W->get_damping(); + else [[likely]] W->get_stiffness() += W->get_geometry() + P1 * P1 * W->get_mass() + P1 * W->get_damping(); +} + +int OALTS::update_trial_status() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + if(if_starting) [[unlikely]] + { + W->update_trial_velocity(2. / DT * (W->get_trial_displacement() - W->get_current_displacement()) - W->get_current_velocity()); + W->update_trial_acceleration(2. / DT * (W->get_trial_velocity() - W->get_current_velocity()) - W->get_current_acceleration()); + } + else [[likely]] + { + W->update_trial_velocity(P1 * W->get_trial_displacement() + P2 * W->get_current_displacement() + P3 * W->get_pre_displacement() - B10 * W->get_current_velocity() - B20 * W->get_pre_velocity()); + W->update_trial_acceleration(P1 * W->get_trial_velocity() + P2 * W->get_current_velocity() + P3 * W->get_pre_velocity() - B10 * W->get_current_acceleration() - B20 * W->get_pre_acceleration()); + } + + return D->update_trial_status(); +} + +void OALTS::update_parameter(const double NT) { + if(suanpan::approx_equal(DT, NT)) return; + + DT = NT; + + P1 = 1. / B0 / DT; + P2 = P1 * A1; + P3 = P1 * A2; +} + +void OALTS::commit_status() { + auto& W = get_domain()->get_factory(); + + if_starting = false; + + W->commit_pre_displacement(); + W->commit_pre_velocity(); + W->commit_pre_acceleration(); + + ImplicitIntegrator::commit_status(); +} + +void OALTS::clear_status() { + if_starting = true; + + ImplicitIntegrator::clear_status(); +} + +vec OALTS::from_incre_velocity(const vec& incre_velocity, const uvec& encoding) { + auto& W = get_domain()->get_factory(); + + return from_total_velocity(W->get_current_velocity()(encoding) + incre_velocity, encoding); +} + +vec OALTS::from_incre_acceleration(const vec& incre_acceleration, const uvec& encoding) { + auto& W = get_domain()->get_factory(); + + return from_total_acceleration(W->get_current_acceleration()(encoding) + incre_acceleration, encoding); +} + +vec OALTS::from_total_velocity(const vec& total_velocity, const uvec& encoding) { + auto& W = get_domain()->get_factory(); + + if(if_starting) return .5 * DT * (total_velocity + W->get_current_velocity()(encoding)) + W->get_current_displacement()(encoding); + + return total_velocity / P1 - A1 * W->get_current_displacement()(encoding) - A2 * W->get_pre_displacement()(encoding) + B10 / P1 * W->get_current_velocity()(encoding) + B20 / P1 * W->get_pre_velocity()(encoding); +} + +vec OALTS::from_total_acceleration(const vec& total_acceleration, const uvec& encoding) { + auto& W = get_domain()->get_factory(); + + if(if_starting) return from_total_velocity(.5 * DT * (total_acceleration + W->get_current_acceleration()(encoding)) + W->get_current_velocity()(encoding), encoding); + + return from_total_velocity(total_acceleration / P1 - A1 * W->get_current_velocity()(encoding) - A2 * W->get_pre_velocity()(encoding) + B10 / P1 * W->get_current_acceleration()(encoding) + B20 / P1 * W->get_pre_acceleration()(encoding), encoding); +} + +void OALTS::print() { suanpan_info("A time integrator using the OALTS algorithm.\ndoi:10.1002/nme.6188\n"); } diff --git a/Solver/Integrator/OALTS.h b/Solver/Integrator/OALTS.h new file mode 100644 index 000000000..34eb38ebb --- /dev/null +++ b/Solver/Integrator/OALTS.h @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class OALTS + * @brief A OALTS class defines a solver using OALTS algorithm. + * + * doi: 10.1002/nme.6188 + * + * @author tlc + * @date 05/12/2022 + * @version 0.1.0 + * @file OALTS.h + * @addtogroup Integrator + * @{ + */ + +#ifndef OALTS_H +#define OALTS_H + +#include "Integrator.h" + +class OALTS final : public ImplicitIntegrator { + const double A1, A2, B0, B1, B2, B10, B20; + + double DT{0.}, P1{0.}, P2{0.}, P3{0.}; + + bool if_starting = true; + +public: + OALTS(unsigned, double); + + void assemble_resistance() override; + void assemble_matrix() override; + + int update_trial_status() override; + + void update_parameter(double) override; + + void commit_status() override; + void clear_status() override; + + vec from_incre_velocity(const vec&, const uvec&) override; + vec from_incre_acceleration(const vec&, const uvec&) override; + vec from_total_velocity(const vec&, const uvec&) override; + vec from_total_acceleration(const vec&, const uvec&) override; + + void print() override; +}; + +#endif + +//! @} diff --git a/Solver/Integrator/RayleighNewmark.cpp b/Solver/Integrator/RayleighNewmark.cpp index 70d604986..0338fcad3 100644 --- a/Solver/Integrator/RayleighNewmark.cpp +++ b/Solver/Integrator/RayleighNewmark.cpp @@ -28,7 +28,7 @@ RayleighNewmark::RayleighNewmark(const unsigned T, const double A, const double , damping_eta(DD) {} void RayleighNewmark::assemble_resistance() { - suanpan::for_all(get_domain().lock()->get_element_pool(), [&](const shared_ptr& t_element) { suanpan::damping::rayleigh::apply(t_element, damping_alpha, damping_beta, damping_zeta, damping_eta); }); + suanpan::for_all(get_domain()->get_element_pool(), [&](const shared_ptr& t_element) { suanpan::damping::rayleigh::apply(t_element, damping_alpha, damping_beta, damping_zeta, damping_eta); }); Newmark::assemble_resistance(); } diff --git a/Solver/Integrator/Tchamwa.cpp b/Solver/Integrator/Tchamwa.cpp new file mode 100644 index 000000000..c81c33e7c --- /dev/null +++ b/Solver/Integrator/Tchamwa.cpp @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ + +#include "Tchamwa.h" +#include +#include + +Tchamwa::Tchamwa(const unsigned T, const double R) + : ExplicitIntegrator(T) + , PHI(2. / (1. + std::max(0., std::min(1., R)))) {} + +void Tchamwa::assemble_resistance() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + auto fa = std::async([&] { D->assemble_resistance(); }); + auto fb = std::async([&] { D->assemble_damping_force(); }); + auto fc = std::async([&] { D->assemble_inertial_force(); }); + + fa.get(); + fb.get(); + fc.get(); + + W->set_sushi(W->get_trial_resistance() + W->get_trial_damping_force() + W->get_trial_inertial_force()); +} + +void Tchamwa::assemble_matrix() { get_domain()->assemble_trial_mass(); } + +int Tchamwa::update_trial_status() { + const auto& D = get_domain(); + auto& W = D->get_factory(); + + W->update_incre_displacement(DT * W->get_current_velocity() + PHI * DT * DT * W->get_current_acceleration()); + W->update_incre_velocity(DT * W->get_current_acceleration()); + + return D->update_trial_status(); +} + +void Tchamwa::update_parameter(const double NT) { DT = NT; } + +void Tchamwa::print() { suanpan_info("A Tchamwa solver.\n"); } diff --git a/Solver/Integrator/Tchamwa.h b/Solver/Integrator/Tchamwa.h new file mode 100644 index 000000000..c7652e3d8 --- /dev/null +++ b/Solver/Integrator/Tchamwa.h @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (C) 2017-2022 Theodore Chang + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ******************************************************************************/ +/** + * @class Tchamwa + * @brief A Tchamwa class defines a solver using Tchamwa algorithm. + * + * @author tlc + * @date 03/12/2022 + * @version 0.1.0 + * @file Tchamwa.h + * @addtogroup Integrator + * @{ + */ + +#ifndef TCHAMWA_H +#define TCHAMWA_H + +#include "Integrator.h" + +class Tchamwa final : public ExplicitIntegrator { + const double PHI; + double DT{0.}; + +public: + Tchamwa(unsigned, double); + + void assemble_resistance() override; + void assemble_matrix() override; + + int update_trial_status() override; + + void update_parameter(double) override; + + void print() override; +}; + +#endif + +//! @} diff --git a/Solver/Integrator/WilsonPenzienNewmark.cpp b/Solver/Integrator/WilsonPenzienNewmark.cpp index 44104c8af..6fdec452a 100644 --- a/Solver/Integrator/WilsonPenzienNewmark.cpp +++ b/Solver/Integrator/WilsonPenzienNewmark.cpp @@ -27,7 +27,7 @@ WilsonPenzienNewmark::WilsonPenzienNewmark(const unsigned T, vec&& DR, const dou int WilsonPenzienNewmark::initialize() { if(SUANPAN_SUCCESS != Newmark::initialize()) return SUANPAN_FAIL; - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); theta.zeros(W->get_size(), damping_ratio.n_elem); @@ -40,7 +40,7 @@ int WilsonPenzienNewmark::process_constraint() { // process constraint for the first time to obtain proper stiffness if(SUANPAN_SUCCESS != Integrator::process_constraint()) return SUANPAN_FAIL; - auto& W = get_domain().lock()->get_factory(); + auto& W = get_domain()->get_factory(); auto& t_stiff = W->get_stiffness(); auto& t_mass = W->get_mass(); @@ -139,7 +139,7 @@ void WilsonPenzienNewmark::reset_status() { } void WilsonPenzienNewmark::assemble_resistance() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_resistance(); }); @@ -156,7 +156,7 @@ void WilsonPenzienNewmark::assemble_resistance() { } void WilsonPenzienNewmark::assemble_matrix() { - const auto& D = get_domain().lock(); + const auto& D = get_domain(); auto& W = D->get_factory(); auto fa = std::async([&] { D->assemble_trial_stiffness(); }); diff --git a/Solver/MPDC.cpp b/Solver/MPDC.cpp index 8736a7ac5..f80962f6f 100644 --- a/Solver/MPDC.cpp +++ b/Solver/MPDC.cpp @@ -10,14 +10,14 @@ MPDC::MPDC(const unsigned T) int MPDC::analyze() { auto& C = get_converger(); auto& G = get_integrator(); - auto& W = G->get_domain().lock()->get_factory(); + const auto& D = G->get_domain(); + auto& W = D->get_factory(); suanpan_info("current analysis time: %.5f.\n", W->get_trial_time()); const auto max_iteration = C->get_max_iteration(); - // ninja anchor - auto& ninja = get_ninja(W); + vec samurai; // get column index for each nonzero dof // uvec load_ref_idx = find(load_ref); @@ -31,54 +31,68 @@ int MPDC::analyze() { unsigned counter = 0; while(true) { + // update for nodes and elements + if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; // process modifiers if(SUANPAN_SUCCESS != G->process_modifier()) return SUANPAN_FAIL; // assemble resistance G->assemble_resistance(); - // assemble stiffness - G->assemble_matrix(); - // process loads - if(SUANPAN_SUCCESS != G->process_load()) return SUANPAN_FAIL; - // process constraints - if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; + + if(constant_matrix()) { + // some loads may have resistance + if(SUANPAN_SUCCESS != G->process_load_resistance()) return SUANPAN_FAIL; + // some constraints may have resistance + if(SUANPAN_SUCCESS != G->process_constraint_resistance()) return SUANPAN_FAIL; + } + else { + // assemble stiffness + G->assemble_matrix(); + // process loads + if(SUANPAN_SUCCESS != G->process_load()) return SUANPAN_FAIL; + // process constraints + if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; + // indicate the global matrix has been assembled + G->set_matrix_assembled_switch(true); + } // solve ninja - if(SUANPAN_SUCCESS != G->solve(ninja, G->get_displacement_residual())) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != G->solve(samurai, G->get_displacement_residual())) return SUANPAN_FAIL; // solve reference displacement - if(SUANPAN_SUCCESS != G->solve(disp_a, W->get_reference_load())) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != G->solve(disp_a, G->get_reference_load())) return SUANPAN_FAIL; if(const auto n_size = W->get_size(); 0 != W->get_mpc()) { mat right, kernel; auto& border = W->get_auxiliary_stiffness(); if(SUANPAN_SUCCESS != G->solve(right, border)) return SUANPAN_FAIL; auto& aux_lambda = get_auxiliary_lambda(W); - if(!solve(aux_lambda, kernel = border.t() * right.head_rows(n_size), border.t() * ninja.head_rows(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; - ninja -= right * aux_lambda; + if(!solve(aux_lambda, kernel = border.t() * right.head_rows(n_size), border.t() * samurai.head(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; + samurai -= right * aux_lambda; disp_a -= right * solve(kernel, border.t() * disp_a.head_rows(n_size)); } - const vec incre_lambda = solve(mat(disp_a.rows(idx)), W->get_trial_settlement()(idx) - W->get_trial_displacement()(idx) - ninja.rows(idx)); + const vec incre_lambda = solve(mat(disp_a.rows(idx)), W->get_trial_settlement()(idx) - G->get_trial_displacement()(idx) - samurai.rows(idx)); - ninja += disp_a * incre_lambda; + samurai += disp_a * incre_lambda; // avoid machine error accumulation - G->erase_machine_error(); + G->erase_machine_error(samurai); + + // exit if converged + if(C->is_converged(counter)) return G->sync_status(true); + // exit if maximum iteration is hit + if(++counter > max_iteration) return SUANPAN_FAIL; + // update internal variable - G->update_internal(ninja); + G->update_internal(samurai); // update trial load factor G->update_trial_load_factor(incre_lambda); // update trial displacement - G->update_trial_displacement(ninja); + G->update_from_ninja(); // for tracking G->update_load(); // for tracking G->update_constraint(); - // update for nodes and elements - if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; - // exit if converged - if(C->is_converged()) return SUANPAN_SUCCESS; - // exit if maximum iteration is hit - if(++counter > max_iteration) return SUANPAN_FAIL; + if(D->get_attribute(ModalAttribute::LinearSystem)) return G->sync_status(false); } } diff --git a/Solver/Newton.cpp b/Solver/Newton.cpp index faf3d0b8c..77129f084 100644 --- a/Solver/Newton.cpp +++ b/Solver/Newton.cpp @@ -28,7 +28,8 @@ Newton::Newton(const unsigned T, const bool IS) int Newton::analyze() { auto& C = get_converger(); auto& G = get_integrator(); - auto& W = G->get_domain().lock()->get_factory(); + const auto& D = G->get_domain(); + auto& W = D->get_factory(); suanpan_info("current analysis time: %.5f.\n", W->get_trial_time()); @@ -37,20 +38,19 @@ int Newton::analyze() { // iteration counter unsigned counter = 0; - // ninja alias - auto& ninja = get_ninja(W); - - vec pre_ninja; + vec samurai, pre_samurai; auto aitken = false; while(true) { + // update for nodes and elements + if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; // process modifiers if(SUANPAN_SUCCESS != G->process_modifier()) return SUANPAN_FAIL; // assemble resistance G->assemble_resistance(); - if(initial_stiffness && counter != 0) { + if((initial_stiffness && counter != 0) || constant_matrix()) { // some loads may have resistance if(SUANPAN_SUCCESS != G->process_load_resistance()) return SUANPAN_FAIL; // some constraints may have resistance @@ -64,13 +64,15 @@ int Newton::analyze() { if(SUANPAN_SUCCESS != G->process_load()) return SUANPAN_FAIL; // process constraints if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; + // indicate the global matrix has been assembled + G->set_matrix_assembled_switch(true); } // call solver - auto flag = G->solve(ninja, G->get_force_residual()); + auto flag = G->solve(samurai, G->get_force_residual()); suanpan_debug([&] { - if(!ninja.is_finite()) { + if(!samurai.is_finite()) { suanpan_fatal("infinite number detected.\n"); flag = SUANPAN_FAIL; } @@ -83,45 +85,49 @@ int Newton::analyze() { } // deal with mpc - if(0 != W->get_mpc()) { - const auto n_size = W->get_size(); + if(const auto n_size = W->get_size(); 0 != W->get_mpc()) { auto& border = W->get_auxiliary_stiffness(); mat right; if(SUANPAN_SUCCESS != G->solve(right, border)) return SUANPAN_FAIL; auto& aux_lambda = get_auxiliary_lambda(W); - if(!solve(aux_lambda, border.t() * right.head_rows(n_size), border.t() * ninja.head_rows(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; - ninja -= right * aux_lambda; + if(!solve(aux_lambda, border.t() * right.head_rows(n_size), border.t() * samurai.head(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; + samurai -= right * aux_lambda; } if(initial_stiffness) { if(!aitken) { aitken = true; - pre_ninja = ninja; + pre_samurai = samurai; } else { aitken = false; - const vec diff_ninja = pre_ninja - ninja; - ninja *= dot(pre_ninja, diff_ninja) / dot(diff_ninja, diff_ninja); + const vec diff_samurai = pre_samurai - samurai; + samurai *= dot(pre_samurai, diff_samurai) / dot(diff_samurai, diff_samurai); } } // avoid machine error accumulation - G->erase_machine_error(); + G->erase_machine_error(samurai); + + // exit if converged + // call corrector if it exists + if(C->is_converged(counter)) return G->sync_status(true); + // exit if maximum iteration is hit + if(++counter > max_iteration) return SUANPAN_FAIL; + // update internal variable - G->update_internal(ninja); + G->update_internal(samurai); // update trial status for factory - G->update_trial_displacement(ninja); + G->update_from_ninja(); // for tracking G->update_load(); // for tracking G->update_constraint(); - // update for nodes and elements - if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; - // exit if converged - if(C->is_converged()) return SUANPAN_SUCCESS; - // exit if maximum iteration is hit - if(++counter > max_iteration) return SUANPAN_FAIL; + // fast handling for linear elastic case + // sync status using newly computed increment across elements and nodes + // this may just call predictor or call corrector + if(D->get_attribute(ModalAttribute::LinearSystem)) return G->sync_status(false); } } diff --git a/Solver/Ramm.cpp b/Solver/Ramm.cpp index 8a03ddc5d..1c0ff6fa8 100644 --- a/Solver/Ramm.cpp +++ b/Solver/Ramm.cpp @@ -29,23 +29,22 @@ Ramm::Ramm(const unsigned T, const double L, const bool F) int Ramm::analyze() { auto& C = get_converger(); auto& G = get_integrator(); - auto& W = G->get_domain().lock()->get_factory(); + auto& W = G->get_domain()->get_factory(); suanpan_info("current load level: %+.5f.\n", W->get_trial_load_factor().at(0)); const auto max_iteration = C->get_max_iteration(); - // ninja anchor - auto& t_ninja = get_ninja(W); - double t_lambda; - vec disp_a, disp_ref; + vec samurai, disp_a, disp_ref; // iteration counter unsigned counter = 0; while(true) { + // update for nodes and elements + if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; // process modifiers if(SUANPAN_SUCCESS != G->process_modifier()) return SUANPAN_FAIL; // assemble resistance @@ -58,21 +57,21 @@ int Ramm::analyze() { if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; // solve ninja - if(SUANPAN_SUCCESS != G->solve(t_ninja, G->get_displacement_residual())) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != G->solve(samurai, G->get_displacement_residual())) return SUANPAN_FAIL; // solve reference displacement - if(SUANPAN_SUCCESS != G->solve(disp_a, W->get_reference_load())) return SUANPAN_FAIL; + if(SUANPAN_SUCCESS != G->solve(disp_a, G->get_reference_load())) return SUANPAN_FAIL; - if(0 != W->get_mpc()) { + if(const auto n_size = W->get_size(); 0 != W->get_mpc()) { mat right, kernel; auto& border = W->get_auxiliary_stiffness(); if(SUANPAN_SUCCESS != G->solve(right, border)) return SUANPAN_FAIL; auto& aux_lambda = get_auxiliary_lambda(W); - if(!solve(aux_lambda, kernel = border.t() * right, border.t() * t_ninja - G->get_auxiliary_residual())) return SUANPAN_FAIL; - t_ninja -= right * aux_lambda; - disp_a -= right * solve(kernel, border.t() * disp_a); + if(!solve(aux_lambda, kernel = border.t() * right.head_rows(n_size), border.t() * samurai.head(n_size) - G->get_auxiliary_residual())) return SUANPAN_FAIL; + samurai -= right * aux_lambda; + disp_a -= right * solve(kernel, border.t() * disp_a.head_rows(n_size)); } - if(0 < counter) t_lambda = -dot(disp_ref, t_ninja) / dot(disp_ref, disp_a); + if(0 < counter) t_lambda = -dot(disp_ref, samurai) / dot(disp_ref, disp_a); else { t_lambda = arc_length / sqrt(dot(disp_a, disp_a) + 1.); @@ -83,33 +82,32 @@ int Ramm::analyze() { // abaqus update disp_ref = disp_a; - t_ninja += disp_a * t_lambda; + samurai += disp_a * t_lambda; // avoid machine error accumulation - G->erase_machine_error(); + G->erase_machine_error(samurai); + + // exit if converged + if(C->is_converged(counter)) { + if(!fixed_arc_length) arc_length *= sqrt(max_iteration / static_cast(counter)); + return SUANPAN_SUCCESS; + } + // exit if maximum iteration is hit + if(++counter > max_iteration) { + if(!fixed_arc_length) arc_length *= .5; + return SUANPAN_FAIL; + } + // update trial displacement - W->update_trial_displacement_by(t_ninja); + G->update_from_ninja(); // update trial load factor - W->update_trial_load_factor_by(vec{t_lambda}); + G->update_trial_load_factor(vec{t_lambda}); // set time to load factor - W->update_trial_time(W->get_trial_load_factor().at(0)); + G->update_trial_time(W->get_trial_load_factor().at(0)); // for tracking G->update_load(); // for tracking G->update_constraint(); - // update for nodes and elements - if(SUANPAN_SUCCESS != G->update_trial_status()) return SUANPAN_FAIL; - - // exit if maximum iteration is hit - if(++counter == max_iteration) { - if(!fixed_arc_length) arc_length *= .5; - return SUANPAN_FAIL; - } - // exit if converged - if(C->is_converged()) { - if(!fixed_arc_length) arc_length *= sqrt(max_iteration / static_cast(counter)); - return SUANPAN_SUCCESS; - } } } diff --git a/Solver/Solver.cpp b/Solver/Solver.cpp index 20fa8e9d6..7bde93c9e 100644 --- a/Solver/Solver.cpp +++ b/Solver/Solver.cpp @@ -18,6 +18,8 @@ #include "Solver.h" #include #include +#include +#include Solver::Solver(const unsigned T) : Tag(T) { suanpan_debug("Solver %u ctor() called.\n", get_tag()); } @@ -25,12 +27,12 @@ Solver::Solver(const unsigned T) Solver::~Solver() { suanpan_debug("Solver %u dtor() called.\n", get_tag()); } int Solver::initialize() { - if(converger == nullptr) { + if(nullptr == converger) { suanpan_error("initialize() needs a valid converger.\n"); return SUANPAN_FAIL; } - if(modifier == nullptr) { + if(nullptr == modifier) { suanpan_error("initialize() needs a valid integrator.\n"); return SUANPAN_FAIL; } @@ -45,3 +47,16 @@ const shared_ptr& Solver::get_converger() const { return converger; } void Solver::set_integrator(const shared_ptr& G) { modifier = G; } const shared_ptr& Solver::get_integrator() const { return modifier; } + +bool Solver::constant_matrix() const { + auto& G = get_integrator(); + const auto& D = G->get_domain(); + auto& S = D->get_current_step(); + + // need to satisfy a number of conditions: + // 1. fixed step size + // 2. if not fixed step size, the effective stiffness needs to be independent from time + // 3. the system needs to be linear + // 4. the effective stiffness has been assembled + return (S->is_fixed_step_size() || G->time_independent_matrix()) && D->get_attribute(ModalAttribute::LinearSystem) && G->matrix_is_assembled(); +} diff --git a/Solver/Solver.h b/Solver/Solver.h index 379154816..0220642d4 100644 --- a/Solver/Solver.h +++ b/Solver/Solver.h @@ -56,6 +56,8 @@ class Solver : public Tag { void set_integrator(const shared_ptr&); [[nodiscard]] const shared_ptr& get_integrator() const; + + [[nodiscard]] bool constant_matrix() const; }; #endif diff --git a/Solver/SolverParser.cpp b/Solver/SolverParser.cpp index 9a86ca48d..35925d56a 100644 --- a/Solver/SolverParser.cpp +++ b/Solver/SolverParser.cpp @@ -221,16 +221,67 @@ int create_new_integrator(const shared_ptr& domain, istringstream& c if(domain->insert(make_shared(tag, std::move(pool)))) code = 1; } else if(is_equal(integrator_type, "GSSSSOptimal")) { - double radius = .5; + auto radius = .5; + if(!get_optional_input(command, radius)) { + suanpan_error("create_new_integrator() needs a valid damping radius.\n"); + return SUANPAN_SUCCESS; + } + + if(domain->insert(make_shared(tag, std::max(0., std::min(radius, 1.))))) code = 1; + } + else if(is_equal(integrator_type, "OALTS")) { + auto radius = .5; + if(!get_optional_input(command, radius)) { + suanpan_error("create_new_integrator() needs a valid damping radius.\n"); + return SUANPAN_SUCCESS; + } + + if(domain->insert(make_shared(tag, std::max(0., std::min(radius, 1.))))) code = 1; + } + else if(is_equal(integrator_type, "BatheTwoStep")) { + auto radius = 0.; + if(!get_optional_input(command, radius)) { + suanpan_error("create_new_integrator() needs a valid damping radius.\n"); + return SUANPAN_SUCCESS; + } + radius = std::max(0., std::min(radius, 1.)); + + auto gamma = .5; + if(!get_optional_input(command, gamma)) { + suanpan_error("create_new_integrator() needs a valid gamma.\n"); + return SUANPAN_SUCCESS; + } + if(gamma <= 0. || gamma >= 1.) gamma = .5; + + if(domain->insert(make_shared(tag, radius, gamma))) code = 1; + } + else if(is_equal(integrator_type, "Tchamwa")) { + auto radius = .5; + if(!get_optional_input(command, radius)) { + suanpan_error("create_new_integrator() needs a valid damping radius.\n"); + return SUANPAN_SUCCESS; + } - if(!get_input(command, radius)) { + if(domain->insert(make_shared(tag, std::max(0., std::min(radius, 1.))))) code = 1; + } + else if(is_equal(integrator_type, "BatheExplicit")) { + auto radius = .5; + if(!get_optional_input(command, radius)) { suanpan_error("create_new_integrator() needs a valid damping radius.\n"); return SUANPAN_SUCCESS; } - if(domain->insert(make_shared(tag, radius))) code = 1; + if(domain->insert(make_shared(tag, std::max(0., std::min(radius, 1.))))) code = 1; + } + else if(is_equal(integrator_type, "GeneralizedAlphaExplicit") || is_equal(integrator_type, "GeneralisedAlphaExplicit")) { + auto radius = .5; + if(!get_optional_input(command, radius)) { + suanpan_error("create_new_integrator() needs a valid damping radius.\n"); + return SUANPAN_SUCCESS; + } + + if(domain->insert(make_shared(tag, std::max(0., std::min(radius, 1.))))) code = 1; } - else if(is_equal(integrator_type, "BatheTwoStep") && domain->insert(make_shared(tag))) code = 1; if(1 == code) { if(0 != domain->get_current_step_tag()) domain->get_current_step()->set_integrator_tag(tag); @@ -282,35 +333,26 @@ int create_new_solver(const shared_ptr& domain, istringstream& comma if(domain->insert(make_shared(tag, arc_length, is_true(fixed_arc_length)))) code = 1; } - else if(is_equal(solver_type, "FEAST")) { + else if(is_equal(solver_type, "FEAST") || is_equal(solver_type, "QuadraticFEAST")) { unsigned eigen_number; if(!get_input(command, eigen_number)) { suanpan_error("create_new_solver() requires a valid number of frequencies.\n"); return SUANPAN_SUCCESS; } - double radius; - if(!get_input(command, radius)) { + double centre; + if(!get_input(command, centre)) { suanpan_error("create_new_solver() requires a valid radius.\n"); return SUANPAN_SUCCESS; } - if(domain->insert(make_shared(tag, eigen_number, radius, false))) code = 1; - } - else if(is_equal(solver_type, "QuadraticFEAST")) { - unsigned eigen_number; - if(!get_input(command, eigen_number)) { - suanpan_error("create_new_solver() requires a valid number of frequencies.\n"); - return SUANPAN_SUCCESS; - } - - double radius; - if(!get_input(command, radius)) { + auto radius = centre; + if(!get_optional_input(command, radius)) { suanpan_error("create_new_solver() requires a valid radius.\n"); return SUANPAN_SUCCESS; } - if(domain->insert(make_shared(tag, eigen_number, radius, true))) code = 1; + if(domain->insert(make_shared(tag, eigen_number, centre, radius, is_equal(solver_type, "QuadraticFEAST")))) code = 1; } else if(is_equal(solver_type, "DisplacementControl") || is_equal(solver_type, "MPDC")) { if(domain->insert(make_shared(tag))) code = 1; } else suanpan_error("create_new_solver() cannot identify solver type.\n"); diff --git a/Step/ArcLength.cpp b/Step/ArcLength.cpp index acacab092..70cf0af6b 100644 --- a/Step/ArcLength.cpp +++ b/Step/ArcLength.cpp @@ -41,7 +41,7 @@ int ArcLength::initialize() { modifier->set_domain(t_domain); // solver - if(nullptr != solver) if(const auto& t_solver = *solver; typeid(t_solver) != typeid(Ramm)) solver = nullptr; + if(nullptr != solver) if(!dynamic_cast(solver.get())) solver = nullptr; if(nullptr == solver) solver = make_shared(); solver->set_converger(tester); solver->set_integrator(modifier); diff --git a/Step/Bead.cpp b/Step/Bead.cpp index ff70332ad..8384bf1f7 100644 --- a/Step/Bead.cpp +++ b/Step/Bead.cpp @@ -67,15 +67,21 @@ int Bead::precheck() { } int Bead::analyze() { - for(const auto& [d_tag, t_domain] : domain_pool) - if(t_domain->is_active()) - for(const auto& [s_tag, t_step] : t_domain->get_step_pool()) { - t_domain->set_current_step_tag(t_step->get_tag()); - t_step->set_domain(t_domain); - if(SUANPAN_FAIL == t_step->Step::initialize()) return SUANPAN_FAIL; - if(SUANPAN_FAIL == t_step->initialize()) return SUANPAN_FAIL; - if(SUANPAN_FAIL == t_step->analyze()) return SUANPAN_FAIL; + for(const auto& [d_tag, t_domain] : domain_pool) { + if(!t_domain->is_active()) continue; + bool initial_record = true; + for(const auto& [s_tag, t_step] : t_domain->get_step_pool()) { + t_domain->set_current_step_tag(t_step->get_tag()); + t_step->set_domain(t_domain); + if(SUANPAN_FAIL == t_step->Step::initialize()) return SUANPAN_FAIL; + if(SUANPAN_FAIL == t_step->initialize()) return SUANPAN_FAIL; + if(initial_record) { + initial_record = false; + t_domain->record(); } + if(SUANPAN_FAIL == t_step->analyze()) return SUANPAN_FAIL; + } + } return SUANPAN_SUCCESS; } diff --git a/Step/Buckle.cpp b/Step/Buckle.cpp index eb9502d04..93669044a 100644 --- a/Step/Buckle.cpp +++ b/Step/Buckle.cpp @@ -66,7 +66,10 @@ int Buckle::analyze() { if(SUANPAN_SUCCESS != G->process_constraint()) return SUANPAN_FAIL; - if(eig_solve(get_eigenvalue(W), get_eigenvector(W), W->get_stiffness(), W->get_geometry()) != SUANPAN_SUCCESS) return SUANPAN_FAIL; + const shared_ptr t_geometry = W->get_geometry()->make_copy(); + t_geometry *= -1.; + + if(eig_solve(get_eigenvalue(W), get_eigenvector(W), W->get_stiffness(), t_geometry, 1, "SM") != SUANPAN_SUCCESS) return SUANPAN_FAIL; suanpan_info("\nbuckling load multiplier: %.8E.\n", W->get_eigenvalue().at(0)); diff --git a/Step/Dynamic.cpp b/Step/Dynamic.cpp index 2efb7b6ca..33ec92eac 100644 --- a/Step/Dynamic.cpp +++ b/Step/Dynamic.cpp @@ -21,14 +21,14 @@ #include #include #include -#include -#include +#include #include #include #include -Dynamic::Dynamic(const unsigned T, const double P) - : Step(T, P) {} +Dynamic::Dynamic(const unsigned T, const double P, const IntegratorType AT) + : Step(T, P) + , analysis_type(AT) {} int Dynamic::initialize() { configure_storage_scheme(); @@ -45,11 +45,21 @@ int Dynamic::initialize() { // integrator if(nullptr == modifier) modifier = make_shared(); + else if(IntegratorType::Implicit == analysis_type) { + if(IntegratorType::Implicit != modifier->type()) { + suanpan_error("an implicit integrator is required.\n"); + return SUANPAN_FAIL; + } + } + else if(IntegratorType::Implicit == modifier->type()) { + suanpan_error("an explicit integrator is required.\n"); + return SUANPAN_FAIL; + } modifier->set_domain(t_domain); // solver // avoid arc length solver - if(nullptr != solver) if(const auto& t_solver = *solver; typeid(t_solver) == typeid(Ramm)) solver = nullptr; + if(nullptr != solver) if(dynamic_cast(solver.get())) solver = nullptr; // automatically enable displacement controlled solver if(nullptr == solver) { auto flag = false; @@ -61,8 +71,7 @@ int Dynamic::initialize() { flag ? solver = make_shared() : solver = make_shared(); } - const auto& t_solver = *solver; - if(const auto& t_modifier = *modifier; typeid(t_solver) == typeid(BFGS) && typeid(t_modifier) == typeid(LeeNewmark) && typeid(t_modifier) == typeid(LeeNewmarkFull)) { + if(dynamic_cast(solver.get()) && dynamic_cast(modifier.get())) { suanpan_error("currently BFGS solver is not supported by Lee damping model.\n"); return SUANPAN_FAIL; } @@ -74,22 +83,17 @@ int Dynamic::initialize() { if(SUANPAN_SUCCESS != modifier->initialize()) return SUANPAN_FAIL; if(SUANPAN_SUCCESS != solver->initialize()) return SUANPAN_FAIL; - modifier->update_parameter(get_ini_step_size()); - modifier->update_compatibility(); - return SUANPAN_SUCCESS; } int Dynamic::analyze() { auto& S = get_solver(); auto& G = get_integrator(); + auto& W = get_factory(); auto remain_time = get_time_period(); auto step_time = get_ini_step_size(); - // record initial state - // if(W->get_current_time() == 0.) G->record(); - unsigned num_increment = 0, num_converged_step = 0; while(true) { @@ -104,18 +108,20 @@ int Dynamic::analyze() { G->update_incre_time(step_time); if(const auto code = S->analyze(); SUANPAN_SUCCESS == code) { // success step + // eat current increment + set_time_left(remain_time -= W->get_incre_time()); // commit converged iteration G->stage_and_commit_status(); // record response G->record(); - // eat current increment - set_time_left(remain_time -= step_time); - if(!is_fixed_step_size() && ++num_converged_step > 5 && G->allow_to_change_time_step()) { - step_time = std::min(get_max_step_size(), step_time * time_step_amplification); - num_converged_step = 0; + if(G->allow_to_change_time_step()) { + if(!is_fixed_step_size() && ++num_converged_step > 5) { + step_time = std::min(get_max_step_size(), step_time * time_step_amplification); + num_converged_step = 0; + } + // check if time overflows + if(step_time > remain_time) step_time = remain_time; } - // check if time overflows - if(step_time > remain_time) step_time = remain_time; } else if(SUANPAN_FAIL == code) { // failed step diff --git a/Step/Dynamic.h b/Step/Dynamic.h index 59212d2c4..6a0f746c6 100644 --- a/Step/Dynamic.h +++ b/Step/Dynamic.h @@ -30,11 +30,15 @@ #include +enum class IntegratorType; + class Dynamic final : public Step { const double time_step_amplification = 1.2; + const IntegratorType analysis_type; + public: - explicit Dynamic(unsigned = 0, double = 1.); + Dynamic(unsigned, double, IntegratorType); int initialize() override; diff --git a/Step/Frequency.cpp b/Step/Frequency.cpp index ad51ad950..38c5c71f7 100644 --- a/Step/Frequency.cpp +++ b/Step/Frequency.cpp @@ -21,9 +21,10 @@ #include #include -Frequency::Frequency(const unsigned T, const unsigned N) +Frequency::Frequency(const unsigned T, const unsigned N, const char TP) : Step(T, 0.) - , eigen_number(N) {} + , eigen_number(N) + , eigen_type(TP) {} int Frequency::initialize() { configure_storage_scheme(); @@ -37,7 +38,7 @@ int Frequency::initialize() { modifier->set_domain(t_domain); // solver - if(nullptr == solver) solver = make_shared(0, eigen_number); + if(nullptr == solver) solver = make_shared(0, eigen_number, eigen_type); solver->set_integrator(modifier); if(SUANPAN_SUCCESS != modifier->initialize()) return SUANPAN_FAIL; diff --git a/Step/Frequency.h b/Step/Frequency.h index 2008763f5..6e30018cf 100644 --- a/Step/Frequency.h +++ b/Step/Frequency.h @@ -32,9 +32,10 @@ class Frequency final : public Step { const unsigned eigen_number; + const char eigen_type; public: - explicit Frequency(unsigned = 0, unsigned = 4); + explicit Frequency(unsigned = 0, unsigned = 4, char = 'S'); int initialize() override; diff --git a/Step/Static.cpp b/Step/Static.cpp index 7f988ef10..38177eec5 100644 --- a/Step/Static.cpp +++ b/Step/Static.cpp @@ -73,9 +73,6 @@ int Static::analyze() { auto remain_time = get_time_period(); auto step_time = get_ini_step_size(); - // record initial state - // if(W->get_current_time() == 0.) G->record(); - unsigned num_increment = 0, num_converged_step = 0; while(true) { diff --git a/Step/StepParser.cpp b/Step/StepParser.cpp index 1a96bb8a7..347ae3dbb 100644 --- a/Step/StepParser.cpp +++ b/Step/StepParser.cpp @@ -19,6 +19,7 @@ #include #include #include +#include int create_new_step(const shared_ptr& domain, istringstream& command) { string step_type; @@ -39,7 +40,14 @@ int create_new_step(const shared_ptr& domain, istringstream& command suanpan_error("create_new_step() reads a wrong number of eigenvalues.\n"); return SUANPAN_SUCCESS; } - if(domain->insert(make_shared(tag, eigen_number))) domain->set_current_step_tag(tag); + + char type = 's'; + if(!get_optional_input(command, type)) { + suanpan_error("create_new_step() needs a correct eigenvalue type.\n"); + return SUANPAN_SUCCESS; + } + + if(domain->insert(make_shared(tag, eigen_number, suanpan::to_upper(type)))) domain->set_current_step_tag(tag); else suanpan_error("create_new_step() cannot create the new step.\n"); } else if(is_equal(step_type, "Buckling") || is_equal(step_type, "Buckle")) { @@ -64,13 +72,22 @@ int create_new_step(const shared_ptr& domain, istringstream& command if(domain->insert(make_shared(tag, time))) domain->set_current_step_tag(tag); else suanpan_error("create_new_step() cannot create the new step.\n"); } - else if(is_equal(step_type, "Dynamic")) { + else if(is_equal(step_type, "Dynamic") || is_equal(step_type, "ImplicitDynamic")) { + auto time = 1.; + if(!command.eof() && !get_input(command, time)) { + suanpan_error("create_new_step() reads a wrong time period.\n"); + return SUANPAN_SUCCESS; + } + if(domain->insert(make_shared(tag, time, IntegratorType::Implicit))) domain->set_current_step_tag(tag); + else suanpan_error("create_new_step() cannot create the new step.\n"); + } + else if(is_equal(step_type, "ExplicitDynamic")) { auto time = 1.; if(!command.eof() && !get_input(command, time)) { suanpan_error("create_new_step() reads a wrong time period.\n"); return SUANPAN_SUCCESS; } - if(domain->insert(make_shared(tag, time))) domain->set_current_step_tag(tag); + if(domain->insert(make_shared(tag, time, IntegratorType::Explicit))) domain->set_current_step_tag(tag); else suanpan_error("create_new_step() cannot create the new step.\n"); } else if(is_equal(step_type, "ArcLength")) { diff --git a/Toolbox/argumentParser.cpp b/Toolbox/argumentParser.cpp index 24cd80525..8512fc35a 100644 --- a/Toolbox/argumentParser.cpp +++ b/Toolbox/argumentParser.cpp @@ -35,8 +35,8 @@ using std::ifstream; using std::ofstream; constexpr auto SUANPAN_MAJOR = 2; -constexpr auto SUANPAN_MINOR = 6; -constexpr auto SUANPAN_PATCH = 1; +constexpr auto SUANPAN_MINOR = 7; +constexpr auto SUANPAN_PATCH = 0; constexpr auto SUANPAN_CODE = "Betelgeuse"; constexpr auto SUANPAN_ARCH = 64; diff --git a/Toolbox/arpack-src/ccdotc.f b/Toolbox/arpack-src/ccdotc.f new file mode 100644 index 000000000..f0f94f422 --- /dev/null +++ b/Toolbox/arpack-src/ccdotc.f @@ -0,0 +1,36 @@ + complex function ccdotc(n,zx,incx,zy,incy) +c +c forms the dot product of a vector. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n + ztemp = (0.0d0,0.0d0) + ccdotc = (0.0d0,0.0d0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = ztemp + conjg(zx(ix))*zy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + ccdotc = ztemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = ztemp + conjg(zx(i))*zy(i) + 30 continue + ccdotc = ztemp + return + end diff --git a/Toolbox/arpack-src/cgetv0.f b/Toolbox/arpack-src/cgetv0.f index cde4e655a..b49e66708 100644 --- a/Toolbox/arpack-src/cgetv0.f +++ b/Toolbox/arpack-src/cgetv0.f @@ -2,13 +2,13 @@ c c\Name: cgetv0 c -c\Description: +c\Description: c Generate a random initial residual vector for the Arnoldi process. -c Force the residual vector to be in the range of the operator OP. +c Force the residual vector to be in the range of the operator OP. c c\Usage: c call cgetv0 -c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, +c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, c IPNTR, WORKD, IERR ) c c\Arguments @@ -35,7 +35,7 @@ c B = 'G' -> generalized eigenvalue problem A*x = lambda*B*x c c ITRY Integer. (INPUT) -c ITRY counts the number of times that cgetv0 is called. +c ITRY counts the number of times that cgetv0 is called. c It should be set to 1 on the initial call to cgetv0. c c INITV Logical variable. (INPUT) @@ -54,11 +54,11 @@ c if this is a "restart". c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c RESID Complex array of length N. (INPUT/OUTPUT) -c Initial residual vector to be generated. If RESID is +c Initial residual vector to be generated. If RESID is c provided, force RESID into the range of the operator OP. c c RNORM Real scalar. (OUTPUT) @@ -89,21 +89,21 @@ c pp 357-385. c c\Routines called: -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c cvout ARPACK utility routine that prints vectors. -c clarnv LAPACK routine for generating a random vector. +c clarnv LAPACK routine for generating a random vector. c cgemv Level 2 BLAS routine for matrix vector multiplication. c ccopy Level 1 BLAS that copies one vector to another. c cdotc Level 1 BLAS that computes the scalar product of two vectors. -c scnrm2 Level 1 BLAS that computes the norm of a vector. +c scnrm2 Level 1 BLAS that computes the norm of a vector. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: getv0.F SID: 2.3 DATE OF SID: 08/27/96 RELEASE: 2 @@ -112,16 +112,16 @@ c c----------------------------------------------------------------------- c - subroutine cgetv0 - & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, + subroutine cgetv0 + & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, & ipntr, workd, ierr ) -c +c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -156,35 +156,29 @@ subroutine cgetv0 c | Local Scalars & Arrays | c %------------------------% c - logical first, inits, orth + logical first, orth integer idist, iseed(4), iter, msglvl, jj Real & rnorm0 Complex & cnorm - save first, iseed, inits, iter, msglvl, orth, rnorm0 + save first, iseed, iter, msglvl, orth, rnorm0 c c %----------------------% c | External Subroutines | c %----------------------% c - external ccopy, cgemv, clarnv, cvout, second + external ccopy, cgemv, clarnv, cvout, arscnd c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & scnrm2, slapy2 Complex - & cdotc - external cdotc, scnrm2, slapy2 -c -c %-----------------% -c | Data Statements | -c %-----------------% -c - data inits /.true./ + & ccdotc + external ccdotc, scnrm2, slapy2 c c %-----------------------% c | Executable Statements | @@ -196,24 +190,21 @@ subroutine cgetv0 c | random number generator | c %-----------------------------------% c - if (inits) then - iseed(1) = 1 - iseed(2) = 3 - iseed(3) = 5 - iseed(4) = 7 - inits = .false. - end if + iseed(1) = 1 + iseed(2) = 3 + iseed(3) = 5 + iseed(4) = 7 c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mgetv0 -c +c ierr = 0 iter = 0 first = .FALSE. @@ -232,48 +223,50 @@ subroutine cgetv0 idist = 2 call clarnv (idist, iseed, n, resid) end if -c +c c %----------------------------------------------------------% c | Force the starting vector into the range of OP to handle | c | the generalized problem when B is possibly (singular). | c %----------------------------------------------------------% c - call second (t2) - if (bmat .eq. 'G') then + call arscnd (t2) + if (itry .eq. 1) then nopx = nopx + 1 ipntr(1) = 1 ipntr(2) = n + 1 call ccopy (n, resid, 1, workd, 1) ido = -1 go to 9000 + else if (itry .gt. 1 .and. bmat .eq. 'G') then + call ccopy (n, resid, 1, workd(n + 1), 1) end if end if -c +c c %----------------------------------------% -c | Back from computing B*(initial-vector) | +c | Back from computing OP*(initial-vector) | c %----------------------------------------% c if (first) go to 20 c c %-----------------------------------------------% -c | Back from computing B*(orthogonalized-vector) | +c | Back from computing OP*(orthogonalized-vector) | c %-----------------------------------------------% c if (orth) go to 40 -c - call second (t3) +c + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) -c +c c %------------------------------------------------------% c | Starting vector is now in the range of OP; r = OP*r; | c | Compute B-norm of starting vector. | c %------------------------------------------------------% c - call second (t2) + call arscnd (t2) first = .TRUE. + if (itry .eq. 1) call ccopy (n, workd(n + 1), 1, resid, 1) if (bmat .eq. 'G') then nbx = nbx + 1 - call ccopy (n, workd(n+1), 1, resid, 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 @@ -281,17 +274,17 @@ subroutine cgetv0 else if (bmat .eq. 'I') then call ccopy (n, resid, 1, workd, 1) end if -c +c 20 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c first = .FALSE. if (bmat .eq. 'G') then - cnorm = cdotc (n, resid, 1, workd, 1) + cnorm = ccdotc (n, resid, 1, workd, 1) rnorm0 = sqrt(slapy2(real(cnorm),aimag(cnorm))) else if (bmat .eq. 'I') then rnorm0 = scnrm2(n, resid, 1) @@ -303,7 +296,7 @@ subroutine cgetv0 c %---------------------------------------------% c if (j .eq. 1) go to 50 -c +c c %---------------------------------------------------------------- c | Otherwise need to B-orthogonalize the starting vector against | c | the current Arnoldi basis using Gram-Schmidt with iter. ref. | @@ -319,16 +312,16 @@ subroutine cgetv0 orth = .TRUE. 30 continue c - call cgemv ('C', n, j-1, one, v, ldv, workd, 1, + call cgemv ('C', n, j-1, one, v, ldv, workd, 1, & zero, workd(n+1), 1) - call cgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, + call cgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, & one, resid, 1) -c +c c %----------------------------------------------------------% c | Compute the B-norm of the orthogonalized starting vector | c %----------------------------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call ccopy (n, resid, 1, workd(n+1), 1) @@ -339,16 +332,16 @@ subroutine cgetv0 else if (bmat .eq. 'I') then call ccopy (n, resid, 1, workd, 1) end if -c +c 40 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c if (bmat .eq. 'G') then - cnorm = cdotc (n, resid, 1, workd, 1) + cnorm = ccdotc (n, resid, 1, workd, 1) rnorm = sqrt(slapy2(real(cnorm),aimag(cnorm))) else if (bmat .eq. 'I') then rnorm = scnrm2(n, resid, 1) @@ -359,14 +352,14 @@ subroutine cgetv0 c %--------------------------------------% c if (msglvl .gt. 2) then - call svout (logfil, 1, rnorm0, ndigit, + call svout (logfil, 1, [rnorm0], ndigit, & '_getv0: re-orthonalization ; rnorm0 is') - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_getv0: re-orthonalization ; rnorm is') end if c if (rnorm .gt. 0.717*rnorm0) go to 50 -c +c iter = iter + 1 if (iter .le. 1) then c @@ -388,11 +381,11 @@ subroutine cgetv0 rnorm = rzero ierr = -1 end if -c +c 50 continue c if (msglvl .gt. 0) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_getv0: B-norm of initial / restarted starting vector') end if if (msglvl .gt. 2) then @@ -400,10 +393,10 @@ subroutine cgetv0 & '_getv0: initial / restarted starting vector') end if ido = 99 -c - call second (t1) +c + call arscnd (t1) tgetv0 = tgetv0 + (t1 - t0) -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/cmout.f b/Toolbox/arpack-src/cmout.f index 1cdaf33e9..ff0478309 100644 --- a/Toolbox/arpack-src/cmout.f +++ b/Toolbox/arpack-src/cmout.f @@ -74,34 +74,34 @@ SUBROUTINE CMOUT( LOUT, M, N, A, LDA, IDIGIT, IFMT ) IF (K1.NE.N) THEN WRITE( LOUT, 9994 )I, ( A( I, J ), J = K1, K2 ) ELSE - WRITE( LOUT, 9984 )I, ( A( I, J ), J = K1, K2 ) + WRITE( LOUT, 9984 )I, ( A( I, J ), J = K1, K2 ) END IF 30 CONTINUE 40 CONTINUE * ELSE IF( NDIGIT.LE.6 ) THEN - DO 60 K1 = 1, N, 2 + DO 60 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) WRITE( LOUT, 9997 )( ICOL, I, I = K1, K2 ) DO 50 I = 1, M IF (K1.NE.N) THEN WRITE( LOUT, 9993 )I, ( A( I, J ), J = K1, K2 ) - ELSE - WRITE( LOUT, 9983 )I, ( A( I, J ), J = K1, K2 ) + ELSE + WRITE( LOUT, 9983 )I, ( A( I, J ), J = K1, K2 ) END IF 50 CONTINUE 60 CONTINUE * ELSE IF( NDIGIT.LE.8 ) THEN - DO 80 K1 = 1, N, 2 + DO 80 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) WRITE( LOUT, 9996 )( ICOL, I, I = K1, K2 ) DO 70 I = 1, M IF (K1.NE.N) THEN WRITE( LOUT, 9992 )I, ( A( I, J ), J = K1, K2 ) ELSE - WRITE( LOUT, 9982 )I, ( A( I, J ), J = K1, K2 ) - END IF + WRITE( LOUT, 9982 )I, ( A( I, J ), J = K1, K2 ) + END IF 70 CONTINUE 80 CONTINUE * @@ -124,20 +124,20 @@ SUBROUTINE CMOUT( LOUT, M, N, A, LDA, IDIGIT, IFMT ) K2 = MIN0( N, K1+3 ) WRITE( LOUT, 9998 )( ICOL, I, I = K1, K2 ) DO 110 I = 1, M - IF ((K1+3).LE.N) THEN + IF ((K1+3).LE.N) THEN WRITE( LOUT, 9974 )I, ( A( I, J ), J = K1, K2 ) ELSE IF ((K1+3-N).EQ.1) THEN WRITE( LOUT, 9964 )I, ( A( I, J ), J = k1, K2 ) ELSE IF ((K1+3-N).EQ.2) THEN WRITE( LOUT, 9954 )I, ( A( I, J ), J = K1, K2 ) ELSE IF ((K1+3-N).EQ.3) THEN - WRITE( LOUT, 9944 )I, ( A( I, J ), J = K1, K2 ) + WRITE( LOUT, 9944 )I, ( A( I, J ), J = K1, K2 ) END IF 110 CONTINUE 120 CONTINUE * ELSE IF( NDIGIT.LE.6 ) THEN - DO 140 K1 = 1, N, 3 + DO 140 K1 = 1, N, 3 K2 = MIN0( N, K1+ 2) WRITE( LOUT, 9997 )( ICOL, I, I = K1, K2 ) DO 130 I = 1, M @@ -185,14 +185,14 @@ SUBROUTINE CMOUT( LOUT, M, N, A, LDA, IDIGIT, IFMT ) 9998 FORMAT( 11X, 4( 9X, 3A1, I4, 9X ) ) 9997 FORMAT( 10X, 4( 11X, 3A1, I4, 11X ) ) 9996 FORMAT( 10X, 3( 13X, 3A1, I4, 13X ) ) - 9995 FORMAT( 12X, 2( 18x, 3A1, I4, 18X ) ) + 9995 FORMAT( 12X, 2( 18x, 3A1, I4, 18X ) ) * *======================================================== * FORMAT FOR 72 COLUMN *======================================================== * * DISPLAY 4 SIGNIFICANT DIGITS -* +* 9994 FORMAT( 1X, ' Row', I4, ':', 1X, 1P,2('(',E10.3,',',E10.3,') ') ) 9984 FORMAT( 1X, ' Row', I4, ':', 1X, 1P,1('(',E10.3,',',E10.3,') ') ) * diff --git a/Toolbox/arpack-src/cnaitr.f b/Toolbox/arpack-src/cnaitr.f index 46aaa03c4..3759760df 100644 --- a/Toolbox/arpack-src/cnaitr.f +++ b/Toolbox/arpack-src/cnaitr.f @@ -2,8 +2,8 @@ c c\Name: cnaitr c -c\Description: -c Reverse communication interface for applying NP additional steps to +c\Description: +c Reverse communication interface for applying NP additional steps to c a K step nonsymmetric Arnoldi factorization. c c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T @@ -19,7 +19,7 @@ c c\Usage: c call cnaitr -c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, +c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, c IPNTR, WORKD, INFO ) c c\Arguments @@ -61,8 +61,8 @@ c Number of additional Arnoldi steps to take. c c NB Integer. (INPUT) -c Blocksize to be used in the recurrence. -c Only work for NB = 1 right now. The goal is to have a +c Blocksize to be used in the recurrence. +c Only work for NB = 1 right now. The goal is to have a c program that implement both the block and non-block method. c c RESID Complex array of length N. (INPUT/OUTPUT) @@ -74,37 +74,37 @@ c B-norm of the updated residual r_{k+p} on output. c c V Complex N by K+NP array. (INPUT/OUTPUT) -c On INPUT: V contains the Arnoldi vectors in the first K +c On INPUT: V contains the Arnoldi vectors in the first K c columns. c On OUTPUT: V contains the new NP Arnoldi vectors in the next c NP columns. The first K columns are unchanged. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Complex (K+NP) by (K+NP) array. (INPUT/OUTPUT) c H is used to store the generated upper Hessenberg matrix. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORK for +c Pointer to mark the starting locations in the WORK for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Complex work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The calling program should not +c for reverse communication. The calling program should not c use WORKD as temporary workspace during the iteration !!!!!! -c On input, WORKD(1:N) = B*RESID and is used to save some +c On input, WORKD(1:N) = B*RESID and is used to save some c computation at the first step. c c INFO Integer. (OUTPUT) @@ -124,14 +124,14 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: c cgetv0 ARPACK routine to generate the initial vector. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c cmout ARPACK utility routine that prints matrices c cvout ARPACK utility routine that prints vectors. c clanhs LAPACK routine that computes various norms of a matrix. @@ -143,29 +143,29 @@ c cgemv Level 2 BLAS routine for matrix vector multiplication. c caxpy Level 1 BLAS that computes a vector triad. c ccopy Level 1 BLAS that copies one vector to another . -c cdotc Level 1 BLAS that computes the scalar product of two vectors. +c cdotc Level 1 BLAS that computes the scalar product of two vectors. c cscal Level 1 BLAS that scales a vector. -c csscal Level 1 BLAS that scales a complex vector by a real number. +c csscal Level 1 BLAS that scales a complex vector by a real number. c scnrm2 Level 1 BLAS that computes the norm of a vector. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas -c +c Dept. of Computational & Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas +c c\SCCS Information: @(#) c FILE: naitr.F SID: 2.3 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks c The algorithm implemented is: -c +c c restart = .false. -c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; +c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; c r_{k} contains the initial residual vector even for k = 0; -c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already +c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already c computed by the calling program. c c betaj = rnorm ; p_{k+1} = B*r_{k} ; @@ -173,7 +173,7 @@ c 1) if ( betaj < tol ) stop or restart depending on j. c ( At present tol is zero ) c if ( restart ) generate a new starting vector. -c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; +c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; c p_{j} = p_{j}/betaj c 3) r_{j} = OP*v_{j} where OP is defined as in cnaupd c For shift-invert mode p_{j} = B*v_{j} is already available. @@ -188,7 +188,7 @@ c 5) Re-orthogonalization step: c s = V_{j}'*B*r_{j} c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || -c alphaj = alphaj + s_{j}; +c alphaj = alphaj + s_{j}; c 6) Iterative refinement step: c If (rnorm1 > 0.717*rnorm) then c rnorm = rnorm1 @@ -198,7 +198,7 @@ c If this is the first time in step 6), go to 5) c Else r_{j} lies in the span of V_{j} numerically. c Set r_{j} = 0 and rnorm = 0; go to 1) -c EndIf +c EndIf c End Do c c\EndLib @@ -206,15 +206,15 @@ c----------------------------------------------------------------------- c subroutine cnaitr - & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, + & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, & ipntr, workd, info) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -241,7 +241,7 @@ subroutine cnaitr & one, zero Real & rone, rzero - parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0), + parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0), & rone = 1.0E+0, rzero = 0.0E+0) c c %--------------% @@ -258,7 +258,7 @@ subroutine cnaitr logical first, orth1, orth2, rstart, step3, step4 integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl, & jj - Real + Real & ovfl, smlnum, tst1, ulp, unfl, betaj, & temp1, rnorm1, wnorm Complex @@ -272,24 +272,24 @@ subroutine cnaitr c | External Subroutines | c %----------------------% c - external caxpy, ccopy, cscal, csscal, cgemv, cgetv0, - & slabad, cvout, cmout, ivout, second + external caxpy, ccopy, cscal, csscal, cgemv, cgetv0, + & slabad, cvout, cmout, ivout, arscnd c c %--------------------% c | External Functions | c %--------------------% c Complex - & cdotc - Real + & ccdotc + Real & slamch, scnrm2, clanhs, slapy2 - external cdotc, scnrm2, clanhs, slamch, slapy2 + external ccdotc, scnrm2, clanhs, slamch, slapy2 c c %---------------------% c | Intrinsic Functions | c %---------------------% c - intrinsic aimag, real, max, sqrt + intrinsic aimag, real, max, sqrt c c %-----------------% c | Data statements | @@ -320,15 +320,15 @@ subroutine cnaitr end if c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mcaitr -c +c c %------------------------------% c | Initial call to this routine | c %------------------------------% @@ -344,7 +344,7 @@ subroutine cnaitr irj = ipj + n ivj = irj + n end if -c +c c %-------------------------------------------------% c | When in reverse communication mode one of: | c | STEP3, STEP4, ORTH1, ORTH2, RSTART | @@ -374,16 +374,16 @@ subroutine cnaitr c | | c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | c %--------------------------------------------------------------% - + 1000 continue c if (msglvl .gt. 1) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: generating Arnoldi vector number') - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_naitr: B-norm of the current residual is') end if -c +c c %---------------------------------------------------% c | STEP 1: Check if the B norm of j-th residual | c | vector is zero. Equivalent to determine whether | @@ -400,16 +400,16 @@ subroutine cnaitr c %---------------------------------------------------% c if (msglvl .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: ****** RESTART AT STEP ******') end if -c +c c %---------------------------------------------% c | ITRY is the loop variable that controls the | c | maximum amount of times that a restart is | c | attempted. NRSTRT is used by stat.h | c %---------------------------------------------% -c +c betaj = rzero nrstrt = nrstrt + 1 itry = 1 @@ -423,7 +423,7 @@ subroutine cnaitr c | RSTART = .true. flow returns here. | c %--------------------------------------% c - call cgetv0 (ido, bmat, itry, .false., n, j, v, ldv, + call cgetv0 (ido, bmat, itry, .false., n, j, v, ldv, & resid, rnorm, ipntr, workd, ierr) if (ido .ne. 99) go to 9000 if (ierr .lt. 0) then @@ -437,12 +437,12 @@ subroutine cnaitr c %------------------------------------------------% c info = j - 1 - call second (t1) + call arscnd (t1) tcaitr = tcaitr + (t1 - t0) ido = 99 go to 9000 end if -c +c 40 continue c c %---------------------------------------------------------% @@ -466,7 +466,7 @@ subroutine cnaitr c call clascl ('General', i, i, rnorm, rone, & n, 1, v(1,j), n, infol) - call clascl ('General', i, i, rnorm, rone, + call clascl ('General', i, i, rnorm, rone, & n, 1, workd(ipj), n, infol) end if c @@ -477,29 +477,29 @@ subroutine cnaitr c step3 = .true. nopx = nopx + 1 - call second (t2) + call arscnd (t2) call ccopy (n, v(1,j), 1, workd(ivj), 1) ipntr(1) = ivj ipntr(2) = irj ipntr(3) = ipj ido = 1 -c +c c %-----------------------------------% c | Exit in order to compute OP*v_{j} | c %-----------------------------------% -c - go to 9000 +c + go to 9000 50 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | c | if step3 = .true. | c %----------------------------------% c - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) - + step3 = .false. c c %------------------------------------------% @@ -507,30 +507,30 @@ subroutine cnaitr c %------------------------------------------% c call ccopy (n, workd(irj), 1, resid, 1) -c +c c %---------------------------------------% c | STEP 4: Finish extending the Arnoldi | c | factorization to length j. | c %---------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 step4 = .true. ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-------------------------------------% c | Exit in order to compute B*OP*v_{j} | c %-------------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call ccopy (n, resid, 1, workd(ipj), 1) end if 60 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | @@ -538,10 +538,10 @@ subroutine cnaitr c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c step4 = .false. c c %-------------------------------------% @@ -549,8 +549,8 @@ subroutine cnaitr c | Compute the B-norm of OP*v_{j}. | c %-------------------------------------% c - if (bmat .eq. 'G') then - cnorm = cdotc (n, resid, 1, workd(ipj), 1) + if (bmat .eq. 'G') then + cnorm = ccdotc (n, resid, 1, workd(ipj), 1) wnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) ) else if (bmat .eq. 'I') then wnorm = scnrm2(n, resid, 1) @@ -569,13 +569,13 @@ subroutine cnaitr c | Compute the j Fourier coefficients w_{j} | c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | c %------------------------------------------% -c +c call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1, & zero, h(1,j), 1) c c %--------------------------------------% c | Orthogonalize r_{j} against V_{j}. | -c | RESID contains OP*v_{j}. See STEP 3. | +c | RESID contains OP*v_{j}. See STEP 3. | c %--------------------------------------% c call cgemv ('N', n, j, -one, v, ldv, h(1,j), 1, @@ -583,51 +583,51 @@ subroutine cnaitr c if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero) c - call second (t4) -c + call arscnd (t4) +c orth1 = .true. -c - call second (t2) +c + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call ccopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*r_{j} | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call ccopy (n, resid, 1, workd(ipj), 1) - end if + end if 70 continue -c +c c %---------------------------------------------------% c | Back from reverse communication if ORTH1 = .true. | c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c orth1 = .false. c c %------------------------------% c | Compute the B-norm of r_{j}. | c %------------------------------% c - if (bmat .eq. 'G') then - cnorm = cdotc (n, resid, 1, workd(ipj), 1) + if (bmat .eq. 'G') then + cnorm = ccdotc (n, resid, 1, workd(ipj), 1) rnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) ) else if (bmat .eq. 'I') then rnorm = scnrm2(n, resid, 1) end if -c +c c %-----------------------------------------------------------% c | STEP 5: Re-orthogonalization / Iterative refinement phase | c | Maximum NITER_ITREF tries. | @@ -650,20 +650,20 @@ subroutine cnaitr c iter = 0 nrorth = nrorth + 1 -c +c c %---------------------------------------------------% c | Enter the Iterative refinement phase. If further | c | refinement is necessary, loop back here. The loop | c | variable is ITER. Perform a step of Classical | c | Gram-Schmidt using all the Arnoldi vectors V_{j} | c %---------------------------------------------------% -c +c 80 continue c if (msglvl .gt. 2) then rtemp(1) = wnorm rtemp(2) = rnorm - call svout (logfil, 2, rtemp, ndigit, + call svout (logfil, 2, rtemp, ndigit, & '_naitr: re-orthogonalization; wnorm and rnorm are') call cvout (logfil, j, h(1,j), ndigit, & '_naitr: j-th column of H') @@ -674,7 +674,7 @@ subroutine cnaitr c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | c %----------------------------------------------------% c - call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1, + call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1, & zero, workd(irj), 1) c c %---------------------------------------------% @@ -684,28 +684,28 @@ subroutine cnaitr c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | c %---------------------------------------------% c - call cgemv ('N', n, j, -one, v, ldv, workd(irj), 1, + call cgemv ('N', n, j, -one, v, ldv, workd(irj), 1, & one, resid, 1) call caxpy (j, one, workd(irj), 1, h(1,j), 1) -c +c orth2 = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call ccopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-----------------------------------% c | Exit in order to compute B*r_{j}. | c | r_{j} is the corrected residual. | c %-----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call ccopy (n, resid, 1, workd(ipj), 1) - end if + end if 90 continue c c %---------------------------------------------------% @@ -713,23 +713,23 @@ subroutine cnaitr c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) - end if + end if c c %-----------------------------------------------------% c | Compute the B-norm of the corrected residual r_{j}. | c %-----------------------------------------------------% -c - if (bmat .eq. 'G') then - cnorm = cdotc (n, resid, 1, workd(ipj), 1) +c + if (bmat .eq. 'G') then + cnorm = ccdotc (n, resid, 1, workd(ipj), 1) rnorm1 = sqrt( slapy2(real(cnorm),aimag(cnorm)) ) else if (bmat .eq. 'I') then rnorm1 = scnrm2(n, resid, 1) end if -c +c if (msglvl .gt. 0 .and. iter .gt. 0 ) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: Iterative refinement for Arnoldi residual') if (msglvl .gt. 2) then rtemp(1) = rnorm @@ -757,7 +757,7 @@ subroutine cnaitr c %---------------------------------------% c rnorm = rnorm1 -c +c else c c %-------------------------------------------% @@ -776,55 +776,55 @@ subroutine cnaitr c do 95 jj = 1, n resid(jj) = zero - 95 continue + 95 continue rnorm = rzero end if -c +c c %----------------------------------------------% c | Branch here directly if iterative refinement | c | wasn't necessary or after at most NITER_REF | c | steps of iterative refinement. | c %----------------------------------------------% -c +c 100 continue -c +c rstart = .false. orth2 = .false. -c - call second (t5) +c + call arscnd (t5) titref = titref + (t5 - t4) -c +c c %------------------------------------% c | STEP 6: Update j = j+1; Continue | c %------------------------------------% c j = j + 1 if (j .gt. k+np) then - call second (t1) + call arscnd (t1) tcaitr = tcaitr + (t1 - t0) ido = 99 do 110 i = max(1,k), k+np-1 -c +c c %--------------------------------------------% c | Check for splitting and deflation. | c | Use a standard test as in the QR algorithm | c | REFERENCE: LAPACK subroutine clahqr | c %--------------------------------------------% -c +c tst1 = slapy2(real(h(i,i)),aimag(h(i,i))) & + slapy2(real(h(i+1,i+1)), aimag(h(i+1,i+1))) if( tst1.eq.real(zero) ) & tst1 = clanhs( '1', k+np, h, ldh, workd(n+1) ) - if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le. - & max( ulp*tst1, smlnum ) ) + if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le. + & max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 110 continue -c +c if (msglvl .gt. 2) then - call cmout (logfil, k+np, k+np, h, ldh, ndigit, + call cmout (logfil, k+np, k+np, h, ldh, ndigit, & '_naitr: Final upper Hessenberg matrix H of order K+NP') end if -c +c go to 9000 end if c @@ -833,7 +833,7 @@ subroutine cnaitr c %--------------------------------------------------------% c go to 1000 -c +c c %---------------------------------------------------------------% c | | c | E N D O F M A I N I T E R A T I O N L O O P | diff --git a/Toolbox/arpack-src/cnapps.f b/Toolbox/arpack-src/cnapps.f index 547926f16..c3a55623f 100644 --- a/Toolbox/arpack-src/cnapps.f +++ b/Toolbox/arpack-src/cnapps.f @@ -19,7 +19,7 @@ c c\Usage: c call cnapps -c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, +c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, c WORKL, WORKD ) c c\Arguments @@ -28,7 +28,7 @@ c c KEV Integer. (INPUT/OUTPUT) c KEV+NP is the size of the input matrix H. -c KEV is the size of the updated matrix HNEW. +c KEV is the size of the updated matrix HNEW. c c NP Integer. (INPUT) c Number of implicit shifts to be applied. @@ -46,7 +46,7 @@ c program. c c H Complex (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT) -c On INPUT, H contains the current KEV+NP by KEV+NP upper +c On INPUT, H contains the current KEV+NP by KEV+NP upper c Hessenberg matrix of the Arnoldi factorization. c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg c matrix in the KEV leading submatrix. @@ -57,7 +57,7 @@ c c RESID Complex array of length N. (INPUT/OUTPUT) c On INPUT, RESID contains the the residual vector r_{k+p}. -c On OUTPUT, RESID is the update residual vector rnew_{k} +c On OUTPUT, RESID is the update residual vector rnew_{k} c in the first KEV locations. c c Q Complex KEV+NP by KEV+NP work array. (WORKSPACE) @@ -92,7 +92,7 @@ c c\Routines called: c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c cmout ARPACK utility routine that prints matrices c cvout ARPACK utility routine that prints vectors. c clacpy LAPACK matrix copy routine. @@ -112,9 +112,9 @@ c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: napps.F SID: 2.3 DATE OF SID: 3/28/97 RELEASE: 2 @@ -132,15 +132,15 @@ c----------------------------------------------------------------------- c subroutine cnapps - & ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, + & ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, & workl, workd ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -153,7 +153,7 @@ subroutine cnapps c %-----------------% c Complex - & h(ldh,kev+np), resid(n), shift(np), + & h(ldh,kev+np), resid(n), shift(np), & v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np) c c %------------% @@ -175,22 +175,22 @@ subroutine cnapps logical first Complex & cdum, f, g, h11, h21, r, s, sigma, t - Real + Real & c, ovfl, smlnum, ulp, unfl, tst1 - save first, ovfl, smlnum, ulp, unfl + save first, ovfl, smlnum, ulp, unfl c c %----------------------% c | External Subroutines | c %----------------------% c - external caxpy, ccopy, cgemv, cscal, clacpy, clartg, - & cvout, claset, slabad, cmout, second, ivout + external caxpy, ccopy, cgemv, cscal, clacpy, clartg, + & cvout, claset, slabad, cmout, arscnd, ivout c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & clanhs, slamch, slapy2 external clanhs, slamch, slapy2 c @@ -204,12 +204,12 @@ subroutine cnapps c | Statement Functions | c %---------------------% c - Real + Real & cabs1 cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) ) c c %----------------% -c | Data statments | +c | Data statements | c %----------------% c data first / .true. / @@ -240,11 +240,11 @@ subroutine cnapps c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mcapps -c - kplusp = kev + np -c +c + kplusp = kev + np +c c %--------------------------------------------% c | Initialize Q to the identity to accumulate | c | the rotations and reflections | @@ -268,9 +268,9 @@ subroutine cnapps sigma = shift(jj) c if (msglvl .gt. 2 ) then - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: shift number.') - call cvout (logfil, 1, sigma, ndigit, + call cvout (logfil, 1, [sigma], ndigit, & '_napps: Value of the shift ') end if c @@ -288,14 +288,14 @@ subroutine cnapps tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) ) if( tst1.eq.rzero ) & tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl ) - if ( abs(real(h(i+1,i))) + if ( abs(real(h(i+1,i))) & .le. max(ulp*tst1, smlnum) ) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_napps: matrix splitting at row/column no.') - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: matrix splitting with shift number.') - call cvout (logfil, 1, h(i+1,i), ndigit, + call cvout (logfil, 1, h(i+1,i), ndigit, & '_napps: off diagonal element.') end if iend = i @@ -307,9 +307,9 @@ subroutine cnapps 40 continue c if (msglvl .gt. 2) then - call ivout (logfil, 1, istart, ndigit, + call ivout (logfil, 1, [istart], ndigit, & '_napps: Start of current block ') - call ivout (logfil, 1, iend, ndigit, + call ivout (logfil, 1, [iend], ndigit, & '_napps: End of current block ') end if c @@ -325,7 +325,7 @@ subroutine cnapps h21 = h(istart+1,istart) f = h11 - sigma g = h21 -c +c do 80 i = istart, iend-1 c c %------------------------------------------------------% @@ -345,7 +345,7 @@ subroutine cnapps do 50 j = i, kplusp t = c*h(i,j) + s*h(i+1,j) h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j) - h(i,j) = t + h(i,j) = t 50 continue c c %---------------------------------------------% @@ -355,7 +355,7 @@ subroutine cnapps do 60 j = 1, min(i+2,iend) t = c*h(j,i) + conjg(s)*h(j,i+1) h(j,i+1) = -s*h(j,i) + c*h(j,i+1) - h(j,i) = t + h(j,i) = t 60 continue c c %-----------------------------------------------------% @@ -365,7 +365,7 @@ subroutine cnapps do 70 j = 1, min(i+jj, kplusp) t = c*q(j,i) + conjg(s)*q(j,i+1) q(j,i+1) = - s*q(j,i) + c*q(j,i+1) - q(j,i) = t + q(j,i) = t 70 continue c c %---------------------------% @@ -381,7 +381,7 @@ subroutine cnapps c %-------------------------------% c | Finished applying the shift. | c %-------------------------------% -c +c 100 continue c c %---------------------------------------------------------% @@ -428,7 +428,7 @@ subroutine cnapps tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) ) if( tst1 .eq. rzero ) & tst1 = clanhs( '1', kev, h, ldh, workl ) - if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) ) + if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 130 continue c @@ -441,9 +441,9 @@ subroutine cnapps c %-------------------------------------------------% c if ( real( h(kev+1,kev) ) .gt. rzero ) - & call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, + & call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, & workd(n+1), 1) -c +c c %----------------------------------------------------------% c | Compute column 1 to kev of (V*Q) in backward order | c | taking advantage of the upper Hessenberg structure of Q. | @@ -460,14 +460,14 @@ subroutine cnapps c %-------------------------------------------------% c call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv) -c +c c %--------------------------------------------------------------% c | Copy the (kev+1)-st column of (V*Q) in the appropriate place | c %--------------------------------------------------------------% c if ( real( h(kev+1,kev) ) .gt. rzero ) & call ccopy (n, workd(n+1), 1, v(1,kev+1), 1) -c +c c %-------------------------------------% c | Update the residual vector: | c | r <- sigmak*r + betak*v(:,kev+1) | @@ -485,7 +485,7 @@ subroutine cnapps & '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}') call cvout (logfil, 1, h(kev+1,kev), ndigit, & '_napps: betak = e_{kev+1}^T*H*e_{kev}') - call ivout (logfil, 1, kev, ndigit, + call ivout (logfil, 1, [kev], ndigit, & '_napps: Order of the final Hessenberg matrix ') if (msglvl .gt. 2) then call cmout (logfil, kev, kev, h, ldh, ndigit, @@ -495,9 +495,9 @@ subroutine cnapps end if c 9000 continue - call second (t1) + call arscnd (t1) tcapps = tcapps + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/cnaup2.f b/Toolbox/arpack-src/cnaup2.f index 44b133576..e36154247 100644 --- a/Toolbox/arpack-src/cnaup2.f +++ b/Toolbox/arpack-src/cnaup2.f @@ -2,13 +2,13 @@ c c\Name: cnaup2 c -c\Description: +c\Description: c Intermediate level interface called by cnaupd. c c\Usage: c call cnaup2 c ( IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD, -c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, +c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, c Q, LDQ, WORKL, IPNTR, WORKD, RWORK, INFO ) c c\Arguments @@ -26,7 +26,7 @@ c The logic for adjusting is contained within the current c subroutine. c If ISHIFT=0, NP is the number of shifts the user needs -c to provide via reverse comunication. 0 < NP < NCV-NEV. +c to provide via reverse communication. 0 < NP < NCV-NEV. c NP may be less than NCV-NEV since a leading block of the current c upper Hessenberg matrix has split off and contains "unwanted" c Ritz values. @@ -38,27 +38,27 @@ c IUPD .NE. 0: use implicit update. c c V Complex N by (NEV+NP) array. (INPUT/OUTPUT) -c The Arnoldi basis vectors are returned in the first NEV +c The Arnoldi basis vectors are returned in the first NEV c columns of V. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Complex (NEV+NP) by (NEV+NP) array. (OUTPUT) c H is used to store the generated upper Hessenberg matrix c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c RITZ Complex array of length NEV+NP. (OUTPUT) c RITZ(1:NEV) contains the computed Ritz values of OP. c c BOUNDS Complex array of length NEV+NP. (OUTPUT) -c BOUNDS(1:NEV) contain the error bounds corresponding to +c BOUNDS(1:NEV) contain the error bounds corresponding to c the computed Ritz values. -c +c c Q Complex (NEV+NP) by (NEV+NP) array. (WORKSPACE) c Private (replicated) work array used to accumulate the c rotation in the shift application step. @@ -67,7 +67,7 @@ c Leading dimension of Q exactly as declared in the calling c program. c -c WORKL Complex work array of length at least +c WORKL Complex work array of length at least c (NEV+NP)**2 + 3*(NEV+NP). (WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. It is used in shifts calculation, shifts @@ -75,15 +75,15 @@ c c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORKD for +c Pointer to mark the starting locations in the WORKD for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Complex work array of length 3*N. (WORKSPACE) c Distributed array to be used in the basic Arnoldi iteration c for reverse communication. The user should not use WORKD @@ -101,7 +101,7 @@ c Error flag on output. c = 0: Normal return. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. +c All possible eigenvalues of OP has been found. c NP returns the number of converged Ritz values. c = 2: No shifts could be applied. c = -8: Error return from LAPACK eigenvalue calculation; @@ -117,32 +117,32 @@ c\BeginLib c c\Local variables: -c xxxxxx Complex +c xxxxxx Complex c c\References: c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c cgetv0 ARPACK initial vector generation routine. +c cgetv0 ARPACK initial vector generation routine. c cnaitr ARPACK Arnoldi factorization routine. c cnapps ARPACK application of implicit shifts routine. -c cneigh ARPACK compute Ritz values and error bounds routine. +c cneigh ARPACK compute Ritz values and error bounds routine. c cngets ARPACK reorder Ritz values and error bounds routine. c csortc ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c cmout ARPACK utility routine that prints matrices c cvout ARPACK utility routine that prints vectors. c svout ARPACK utility routine that prints vectors. c slamch LAPACK routine that determines machine constants. c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c ccopy Level 1 BLAS that copies one vector to another . -c cdotc Level 1 BLAS that computes the scalar product of two vectors. +c cdotc Level 1 BLAS that computes the scalar product of two vectors. c cswap Level 1 BLAS that swaps two vectors. c scnrm2 Level 1 BLAS that computes the norm of a vector. c @@ -151,10 +151,10 @@ c Richard Lehoucq CRPC / Rice Universitya c Chao Yang Houston, Texas c Dept. of Computational & -c Applied Mathematics -c Rice University -c Houston, Texas -c +c Applied Mathematics +c Rice University +c Houston, Texas +c c\SCCS Information: @(#) c FILE: naup2.F SID: 2.6 DATE OF SID: 06/01/00 RELEASE: 2 c @@ -166,16 +166,16 @@ c----------------------------------------------------------------------- c subroutine cnaup2 - & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, + & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, + & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, & q, ldq, workl, ipntr, workd, rwork, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -184,7 +184,7 @@ subroutine cnaup2 character bmat*1, which*2 integer ido, info, ishift, iupd, mode, ldh, ldq, ldv, mxiter, & n, nev, np - Real + Real & tol c c %-----------------% @@ -192,20 +192,20 @@ subroutine cnaup2 c %-----------------% c integer ipntr(13) - Complex - & bounds(nev+np), h(ldh,nev+np), q(ldq,nev+np), - & resid(n), ritz(nev+np), v(ldv,nev+np), + Complex + & bounds(nev+np), h(ldh,nev+np), q(ldq,nev+np), + & resid(n), ritz(nev+np), v(ldv,nev+np), & workd(3*n), workl( (nev+np)*(nev+np+3) ) - Real + Real & rwork(nev+np) c c %------------% c | Parameters | c %------------% c - Complex + Complex & one, zero - Real + Real & rzero parameter (one = (1.0E+0, 0.0E+0) , zero = (0.0E+0, 0.0E+0) , & rzero = 0.0E+0 ) @@ -215,16 +215,16 @@ subroutine cnaup2 c %---------------% c logical cnorm , getv0, initv , update, ushift - integer ierr , iter , kplusp, msglvl, nconv, + integer ierr , iter , kplusp, msglvl, nconv, & nevbef, nev0 , np0 , nptemp, i , - & j - Complex + & j + Complex & cmpnorm - Real + Real & rnorm , eps23, rtemp character wprime*2 c - save cnorm, getv0, initv , update, ushift, + save cnorm, getv0, initv , update, ushift, & rnorm, iter , kplusp, msglvl, nconv , & nevbef, nev0 , np0 , eps23 c @@ -240,17 +240,17 @@ subroutine cnaup2 c %----------------------% c external ccopy, cgetv0, cnaitr, cneigh, cngets, cnapps, - & csortc, cswap, cmout, cvout, ivout, second + & csortc, cswap, cmout, cvout, ivout, arscnd c c %--------------------% c | External functions | c %--------------------% c - Complex - & cdotc - Real + Complex + & ccdotc + Real & scnrm2, slamch, slapy2 - external cdotc, scnrm2, slamch, slapy2 + external ccdotc, scnrm2, slamch, slapy2 c c %---------------------% c | Intrinsic Functions | @@ -263,11 +263,11 @@ subroutine cnaup2 c %-----------------------% c if (ido .eq. 0) then -c - call second (t0) -c +c + call arscnd (t0) +c msglvl = mcaup2 -c +c nev0 = nev np0 = np c @@ -283,7 +283,7 @@ subroutine cnaup2 kplusp = nev + np nconv = 0 iter = 0 -c +c c %---------------------------------% c | Get machine dependent constant. | c %---------------------------------% @@ -313,7 +313,7 @@ subroutine cnaup2 initv = .false. end if end if -c +c c %---------------------------------------------% c | Get a possibly random starting vector and | c | force it into the range of the operator OP. | @@ -330,7 +330,7 @@ subroutine cnaup2 if (rnorm .eq. rzero) then c c %-----------------------------------------% -c | The initial vector is zero. Error exit. | +c | The initial vector is zero. Error exit. | c %-----------------------------------------% c info = -9 @@ -339,7 +339,7 @@ subroutine cnaup2 getv0 = .false. ido = 0 end if -c +c c %-----------------------------------% c | Back from reverse communication : | c | continue with update step | @@ -359,12 +359,12 @@ subroutine cnaup2 c %-------------------------------------% c if (cnorm) go to 100 -c +c c %----------------------------------------------------------% c | Compute the first NEV steps of the Arnoldi factorization | c %----------------------------------------------------------% c - call cnaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, + call cnaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, & h, ldh, ipntr, workd, info) c if (ido .ne. 99) go to 9000 @@ -375,7 +375,7 @@ subroutine cnaup2 info = -9999 go to 1200 end if -c +c c %--------------------------------------------------------------% c | | c | M A I N ARNOLDI I T E R A T I O N L O O P | @@ -383,16 +383,16 @@ subroutine cnaup2 c | factorization in place. | c | | c %--------------------------------------------------------------% -c +c 1000 continue c iter = iter + 1 c if (msglvl .gt. 0) then - call ivout (logfil, 1, iter, ndigit, + call ivout (logfil, 1, [iter], ndigit, & '_naup2: **** Start of major iteration number ****') end if -c +c c %-----------------------------------------------------------% c | Compute NP additional steps of the Arnoldi factorization. | c | Adjust NP since NEV might have been updated by last call | @@ -402,9 +402,9 @@ subroutine cnaup2 np = kplusp - nev c if (msglvl .gt. 1) then - call ivout (logfil, 1, nev, ndigit, + call ivout (logfil, 1, [nev], ndigit, & '_naup2: The length of the current Arnoldi factorization') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naup2: Extend the Arnoldi factorization by') end if c @@ -430,10 +430,10 @@ subroutine cnaup2 update = .false. c if (msglvl .gt. 1) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_naup2: Corresponding B-norm of the residual') end if -c +c c %--------------------------------------------------------% c | Compute the eigenvalues and corresponding error bounds | c | of the current upper Hessenberg matrix. | @@ -452,7 +452,7 @@ subroutine cnaup2 c | to be used in the convergence test. | c | The wanted part of the spectrum and corresponding | c | error bounds are in the last NEV loc. of RITZ, | -c | and BOUNDS respectively. | +c | and BOUNDS respectively. | c %---------------------------------------------------% c nev = nev0 @@ -475,7 +475,7 @@ subroutine cnaup2 c %---------------------------------------------------% c call cngets (ishift, which, nev, np, ritz, bounds) -c +c c %------------------------------------------------------------% c | Convergence test: currently we use the following criteria. | c | The relative accuracy of a Ritz value is considered | @@ -489,22 +489,22 @@ subroutine cnaup2 c do 25 i = 1, nev rtemp = max( eps23, slapy2( real (ritz(np+i)), - & aimag(ritz(np+i)) ) ) - if ( slapy2(real (bounds(np+i)),aimag(bounds(np+i))) + & aimag(ritz(np+i)) ) ) + if ( slapy2(real (bounds(np+i)),aimag(bounds(np+i))) & .le. tol*rtemp ) then nconv = nconv + 1 end if 25 continue -c +c if (msglvl .gt. 2) then kp(1) = nev kp(2) = np kp(3) = nconv - call ivout (logfil, 3, kp, ndigit, + call ivout (logfil, 3, kp, ndigit, & '_naup2: NEV, NP, NCONV are') call cvout (logfil, kplusp, ritz, ndigit, & '_naup2: The eigenvalues of H') - call cvout (logfil, kplusp, bounds, ndigit, + call cvout (logfil, kplusp, bounds, ndigit, & '_naup2: Ritz estimates of the current NCV Ritz values') end if c @@ -525,8 +525,8 @@ subroutine cnaup2 nev = nev + 1 end if 30 continue -c - if ( (nconv .ge. nev0) .or. +c + if ( (nconv .ge. nev0) .or. & (iter .gt. mxiter) .or. & (np .eq. 0) ) then c @@ -537,7 +537,7 @@ subroutine cnaup2 & ndigit, & '_naup2: Ritz estimates computed by _neigh:') end if -c +c c %------------------------------------------------% c | Prepare to exit. Put the converged Ritz values | c | and corresponding bounds in RITZ(1:NCONV) and | @@ -573,7 +573,7 @@ subroutine cnaup2 c | by 1 / max(eps23, magnitude of the Ritz value). | c %--------------------------------------------------% c - do 35 j = 1, nev0 + do 35 j = 1, nev0 rtemp = max( eps23, slapy2( real (ritz(j)), & aimag(ritz(j)) ) ) bounds(j) = bounds(j)/rtemp @@ -616,13 +616,13 @@ subroutine cnaup2 end if c c %------------------------------------% -c | Max iterations have been exceeded. | +c | Max iterations have been exceeded. | c %------------------------------------% c if (iter .gt. mxiter .and. nconv .lt. nev0) info = 1 c c %---------------------% -c | No shifts to apply. | +c | No shifts to apply. | c %---------------------% c if (np .eq. 0 .and. nconv .lt. nev0) info = 2 @@ -631,7 +631,7 @@ subroutine cnaup2 go to 1100 c else if ( (nconv .lt. nev0) .and. (ishift .eq. 1) ) then -c +c c %-------------------------------------------------% c | Do not have all the requested eigenvalues yet. | c | To prevent possible stagnation, adjust the size | @@ -646,24 +646,24 @@ subroutine cnaup2 nev = 2 end if np = kplusp - nev -c +c c %---------------------------------------% c | If the size of NEV was just increased | c | resort the eigenvalues. | c %---------------------------------------% -c - if (nevbef .lt. nev) +c + if (nevbef .lt. nev) & call cngets (ishift, which, nev, np, ritz, bounds) c - end if -c + end if +c if (msglvl .gt. 0) then - call ivout (logfil, 1, nconv, ndigit, + call ivout (logfil, 1, [nconv], ndigit, & '_naup2: no. of "converged" Ritz values at this iter.') if (msglvl .gt. 1) then kp(1) = nev kp(2) = np - call ivout (logfil, 2, kp, ndigit, + call ivout (logfil, 2, kp, ndigit, & '_naup2: NEV and NP are') call cvout (logfil, nev, ritz(np+1), ndigit, & '_naup2: "wanted" Ritz values ') @@ -687,7 +687,7 @@ subroutine cnaup2 ushift = .false. c if ( ishift .ne. 1 ) then -c +c c %----------------------------------% c | Move the NP shifts from WORKL to | c | RITZ, to free up WORKL | @@ -697,12 +697,12 @@ subroutine cnaup2 call ccopy (np, workl, 1, ritz, 1) end if c - if (msglvl .gt. 2) then - call ivout (logfil, 1, np, ndigit, + if (msglvl .gt. 2) then + call ivout (logfil, 1, [np], ndigit, & '_naup2: The number of shifts to apply ') call cvout (logfil, np, ritz, ndigit, & '_naup2: values of the shifts') - if ( ishift .eq. 1 ) + if ( ishift .eq. 1 ) & call cvout (logfil, np, bounds, ndigit, & '_naup2: Ritz estimates of the shifts') end if @@ -714,7 +714,7 @@ subroutine cnaup2 c | The first 2*N locations of WORKD are used as workspace. | c %---------------------------------------------------------% c - call cnapps (n, nev, np, ritz, v, ldv, + call cnapps (n, nev, np, ritz, v, ldv, & h, ldh, resid, q, ldq, workl, workd) c c %---------------------------------------------% @@ -724,37 +724,37 @@ subroutine cnaup2 c %---------------------------------------------% c cnorm = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call ccopy (n, resid, 1, workd(n+1), 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*RESID | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call ccopy (n, resid, 1, workd, 1) end if -c +c 100 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(1:N) := B*RESID | c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c - if (bmat .eq. 'G') then - cmpnorm = cdotc (n, resid, 1, workd, 1) +c + if (bmat .eq. 'G') then + cmpnorm = ccdotc (n, resid, 1, workd, 1) rnorm = sqrt(slapy2(real (cmpnorm),aimag(cmpnorm))) else if (bmat .eq. 'I') then rnorm = scnrm2(n, resid, 1) @@ -762,12 +762,12 @@ subroutine cnaup2 cnorm = .false. c if (msglvl .gt. 2) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_naup2: B-norm of residual for compressed factorization') call cmout (logfil, nev, nev, h, ldh, ndigit, & '_naup2: Compressed upper Hessenberg matrix H') end if -c +c go to 1000 c c %---------------------------------------------------------------% @@ -780,7 +780,7 @@ subroutine cnaup2 c mxiter = iter nev = nconv -c +c 1200 continue ido = 99 c @@ -788,9 +788,9 @@ subroutine cnaup2 c | Error Exit | c %------------% c - call second (t1) + call arscnd (t1) tcaup2 = t1 - t0 -c +c 9000 continue c c %---------------% diff --git a/Toolbox/arpack-src/cnaupd.f b/Toolbox/arpack-src/cnaupd.f index de1a761cc..57be328bf 100644 --- a/Toolbox/arpack-src/cnaupd.f +++ b/Toolbox/arpack-src/cnaupd.f @@ -2,11 +2,11 @@ c c\Name: cnaupd c -c\Description: +c\Description: c Reverse communication interface for the Implicitly Restarted Arnoldi -c iteration. This is intended to be used to find a few eigenpairs of a -c complex linear operator OP with respect to a semi-inner product defined -c by a hermitian positive semi-definite real matrix B. B may be the identity +c iteration. This is intended to be used to find a few eigenpairs of a +c complex linear operator OP with respect to a semi-inner product defined +c by a hermitian positive semi-definite real matrix B. B may be the identity c matrix. NOTE: if both OP and B are real, then ssaupd or snaupd should c be used. c @@ -14,7 +14,7 @@ c The computed approximate eigenvalues are called Ritz values and c the corresponding approximate eigenvectors are called Ritz vectors. c -c cnaupd is usually called iteratively to solve one of the +c cnaupd is usually called iteratively to solve one of the c following problems: c c Mode 1: A*x = lambda*x. @@ -25,10 +25,10 @@ c ===> (If M can be factored see remark 3 below) c c Mode 3: A*x = lambda*M*x, M hermitian semi-definite -c ===> OP = inv[A - sigma*M]*M and B = M. -c ===> shift-and-invert mode +c ===> OP = inv[A - sigma*M]*M and B = M. +c ===> shift-and-invert mode c If OP*x = amu*x, then lambda = sigma + 1/amu. -c +c c c NOTE: The action of w <- inv[A - sigma*M]*v or w <- inv[M]*v c should be accomplished either by a direct method @@ -49,7 +49,7 @@ c c\Arguments c IDO Integer. (INPUT/OUTPUT) -c Reverse communication flag. IDO must be zero on the first +c Reverse communication flag. IDO must be zero on the first c call to cnaupd. IDO will be set internally to c indicate the type of operation to be performed. Control is c then given back to the calling routine which has the @@ -72,14 +72,14 @@ c IDO = 2: compute Y = M * X where c IPNTR(1) is the pointer into WORKD for X, c IPNTR(2) is the pointer into WORKD for Y. -c IDO = 3: compute and return the shifts in the first +c IDO = 3: compute and return the shifts in the first c NP locations of WORKL. c IDO = 99: done c ------------------------------------------------------------- -c After the initialization phase, when the routine is used in -c the "shift-and-invert" mode, the vector M * X is already +c After the initialization phase, when the routine is used in +c the "shift-and-invert" mode, the vector M * X is already c available and does not need to be recomputed in forming OP*X. -c +c c BMAT Character*1. (INPUT) c BMAT specifies the type of the matrix B that defines the c semi-inner product for the operator OP. @@ -100,15 +100,15 @@ c NEV Integer. (INPUT) c Number of eigenvalues of OP to be computed. 0 < NEV < N-1. c -c TOL Real scalar. (INPUT) -c Stopping criteria: the relative accuracy of the Ritz value +c TOL Real scalar. (INPUT) +c Stopping criteria: the relative accuracy of the Ritz value c is considered acceptable if BOUNDS(I) .LE. TOL*ABS(RITZ(I)) c where ABS(RITZ(I)) is the magnitude when RITZ(I) is complex. c DEFAULT = slamch('EPS') (machine precision as computed c by the LAPACK auxiliary subroutine slamch). c -c RESID Complex array of length N. (INPUT/OUTPUT) -c On INPUT: +c RESID Complex array of length N. (INPUT/OUTPUT) +c On INPUT: c If INFO .EQ. 0, a random initial residual vector is used. c If INFO .NE. 0, RESID contains the initial residual vector, c possibly from a previous run. @@ -118,15 +118,15 @@ c NCV Integer. (INPUT) c Number of columns of the matrix V. NCV must satisfy the two c inequalities 1 <= NCV-NEV and NCV <= N. -c This will indicate how many Arnoldi vectors are generated -c at each iteration. After the startup phase in which NEV -c Arnoldi vectors are generated, the algorithm generates -c approximately NCV-NEV Arnoldi vectors at each subsequent update -c iteration. Most of the cost in generating each Arnoldi vector is +c This will indicate how many Arnoldi vectors are generated +c at each iteration. After the startup phase in which NEV +c Arnoldi vectors are generated, the algorithm generates +c approximately NCV-NEV Arnoldi vectors at each subsequent update +c iteration. Most of the cost in generating each Arnoldi vector is c in the matrix-vector operation OP*x. (See remark 4 below.) c -c V Complex array N by NCV. (OUTPUT) -c Contains the final set of Arnoldi basis vectors. +c V Complex array N by NCV. (OUTPUT) +c Contains the final set of Arnoldi basis vectors. c c LDV Integer. (INPUT) c Leading dimension of V exactly as declared in the calling program. @@ -137,23 +137,23 @@ c the components of the unwanted eigenvector. c ------------------------------------------------------------- c ISHIFT = 0: the shifts are to be provided by the user via -c reverse communication. The NCV eigenvalues of +c reverse communication. The NCV eigenvalues of c the Hessenberg matrix H are returned in the part c of WORKL array corresponding to RITZ. c ISHIFT = 1: exact shifts with respect to the current -c Hessenberg matrix H. This is equivalent to -c restarting the iteration from the beginning +c Hessenberg matrix H. This is equivalent to +c restarting the iteration from the beginning c after updating the starting vector with a linear -c combination of Ritz vectors associated with the +c combination of Ritz vectors associated with the c "wanted" eigenvalues. c ISHIFT = 2: other choice of internal shift to be defined. c ------------------------------------------------------------- c -c IPARAM(2) = No longer referenced +c IPARAM(2) = No longer referenced c c IPARAM(3) = MXITER -c On INPUT: maximum number of Arnoldi update iterations allowed. -c On OUTPUT: actual number of Arnoldi update iterations taken. +c On INPUT: maximum number of Arnoldi update iterations allowed. +c On OUTPUT: actual number of Arnoldi update iterations taken. c c IPARAM(4) = NB: blocksize to be used in the recurrence. c The code currently works only for NB = 1. @@ -163,11 +163,11 @@ c the convergence criterion. c c IPARAM(6) = IUPD -c No longer referenced. Implicit restarting is ALWAYS used. +c No longer referenced. Implicit restarting is ALWAYS used. c c IPARAM(7) = MODE c On INPUT determines what type of eigenproblem is being solved. -c Must be 1,2,3; See under \Description of cnaupd for the +c Must be 1,2,3; See under \Description of cnaupd for the c four modes available. c c IPARAM(8) = NP @@ -186,7 +186,7 @@ c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X in WORKD. c IPNTR(2): pointer to the current result vector Y in WORKD. -c IPNTR(3): pointer to the vector B * X in WORKD when used in +c IPNTR(3): pointer to the vector B * X in WORKD when used in c the shift-and-invert mode. c IPNTR(4): pointer to the next available location in WORKL c that is untouched by the program. @@ -199,7 +199,7 @@ c c Note: IPNTR(9:13) is only referenced by cneupd. See Remark 2 below. c -c IPNTR(9): pointer to the NCV RITZ values of the +c IPNTR(9): pointer to the NCV RITZ values of the c original system. c IPNTR(10): Not Used c IPNTR(11): pointer to the NCV corresponding error bounds. @@ -210,21 +210,21 @@ c cneupd if RVEC = .TRUE. See Remark 2 below. c c ------------------------------------------------------------- -c -c WORKD Complex work array of length 3*N. (REVERSE COMMUNICATION) +c +c WORKD Complex work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The user should not use WORKD +c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration !!!!!!!!!! -c See Data Distribution Note below. +c See Data Distribution Note below. c -c WORKL Complex work array of length LWORKL. (OUTPUT/WORKSPACE) +c WORKL Complex work array of length LWORKL. (OUTPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. See Data Distribution Note below. c c LWORKL Integer. (INPUT) c LWORKL must be at least 3*NCV**2 + 5*NCV. c -c RWORK Real work array of length NCV (WORKSPACE) +c RWORK Real work array of length NCV (WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. c @@ -236,18 +236,18 @@ c Error flag on output. c = 0: Normal exit. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. IPARAM(5) +c All possible eigenvalues of OP has been found. IPARAM(5) c returns the number of wanted converged Ritz values. c = 2: No longer an informational error. Deprecated starting c with release 2 of ARPACK. -c = 3: No shifts could be applied during a cycle of the -c Implicitly restarted Arnoldi iteration. One possibility -c is to increase the size of NCV relative to NEV. +c = 3: No shifts could be applied during a cycle of the +c Implicitly restarted Arnoldi iteration. One possibility +c is to increase the size of NCV relative to NEV. c See remark 4 below. c = -1: N must be positive. c = -2: NEV must be positive. -c = -3: NCV-NEV >= 1 and less than or equal to N. -c = -4: The maximum number of Arnoldi update iteration +c = -3: NCV-NEV >= 2 and less than or equal to N. +c = -4: The maximum number of Arnoldi update iteration c must be greater than zero. c = -5: WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI' c = -6: BMAT must be one of 'I' or 'G'. @@ -268,16 +268,16 @@ c selection of WHICH should be made with this in mind when using c Mode = 3. When operating in Mode = 3 setting WHICH = 'LM' will c compute the NEV eigenvalues of the original problem that are -c closest to the shift SIGMA . After convergence, approximate eigenvalues +c closest to the shift SIGMA . After convergence, approximate eigenvalues c of the original problem may be obtained with the ARPACK subroutine cneupd. c -c 2. If a basis for the invariant subspace corresponding to the converged Ritz -c values is needed, the user must call cneupd immediately following +c 2. If a basis for the invariant subspace corresponding to the converged Ritz +c values is needed, the user must call cneupd immediately following c completion of cnaupd. This is new starting with release 2 of ARPACK. c c 3. If M can be factored into a Cholesky factorization M = LL` c then Mode = 2 should not be selected. Instead one should use -c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular +c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular c linear systems should be solved with L and L` rather c than computing inverses. After convergence, an approximate c eigenvector z of the original problem is recovered by solving @@ -287,11 +287,11 @@ c of NCV relative to NEV. The only formal requirement is that NCV > NEV + 1. c However, it is recommended that NCV .ge. 2*NEV. If many problems of c the same type are to be solved, one should experiment with increasing -c NCV while keeping NEV fixed for a given test problem. This will +c NCV while keeping NEV fixed for a given test problem. This will c usually decrease the required number of OP*x operations but it c also increases the work and storage required to maintain the orthogonal c basis vectors. The optimal "cross-over" with respect to CPU time -c is problem dependent and must be determined empirically. +c is problem dependent and must be determined empirically. c See Chapter 8 of Reference 2 for further information. c c 5. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the @@ -305,11 +305,11 @@ c c----------------------------------------------------------------------- c -c\Data Distribution Note: +c\Data Distribution Note: c c Fortran-D syntax: c ================ -c Complex resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) +c Complex resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) c decompose d1(n), d2(n,ncv) c align resid(i) with d1(i) c align v(i,j) with d2(i,j) @@ -321,13 +321,13 @@ c c Cray MPP syntax: c =============== -c Complex resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) +c Complex resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) c shared resid(block), v(block,:), workd(block,:) c replicated workl(lworkl) -c +c c CM2/CM5 syntax: c ============== -c +c c----------------------------------------------------------------------- c c include 'ex-nonsym.doc' @@ -343,11 +343,11 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett & Y. Saad, "_Complex_ Shift and Invert Strategies for -c Real Matrices", Linear Algebra and its Applications, vol 88/89, +c _Real_ Matrices", Linear Algebra and its Applications, vol 88/89, c pp 575-595, (1987). c c\Routines called: @@ -356,19 +356,19 @@ c cstatn ARPACK routine that initializes the timing variables. c ivout ARPACK utility routine that prints integers. c cvout ARPACK utility routine that prints vectors. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c slamch LAPACK routine that determines machine constants. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas -c +c Applied Mathematics +c Rice University +c Houston, Texas +c c\SCCS Information: @(#) -c FILE: naupd.F SID: 2.9 DATE OF SID: 07/21/02 RELEASE: 2 +c FILE: naupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 c c\Remarks c @@ -377,15 +377,15 @@ c----------------------------------------------------------------------- c subroutine cnaupd - & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, + & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, & ipntr, workd, workl, lworkl, rwork, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -393,7 +393,7 @@ subroutine cnaupd c character bmat*1, which*2 integer ido, info, ldv, lworkl, n, ncv, nev - Real + Real & tol c c %-----------------% @@ -403,7 +403,7 @@ subroutine cnaupd integer iparam(11), ipntr(14) Complex & resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) - Real + Real & rwork(ncv) c c %------------% @@ -412,13 +412,13 @@ subroutine cnaupd c Complex & one, zero - parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0)) + parameter (one = (1.0E+0, 0.0E+0) , zero = (0.0E+0, 0.0E+0) ) c c %---------------% c | Local Scalars | c %---------------% c - integer bounds, ierr, ih, iq, ishift, iupd, iw, + integer bounds, ierr, ih, iq, ishift, iupd, iw, & ldh, ldq, levec, mode, msglvl, mxiter, nb, & nev0, next, np, ritz, j save bounds, ih, iq, ishift, iupd, iw, @@ -429,29 +429,29 @@ subroutine cnaupd c | External Subroutines | c %----------------------% c - external cnaup2, cvout, ivout, second, cstatn + external cnaup2, cvout, ivout, arscnd, cstatn c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & slamch external slamch c c %-----------------------% c | Executable Statements | c %-----------------------% -c +c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c call cstatn - call second (t0) + call arscnd (t0) msglvl = mcaupd c c %----------------% @@ -496,7 +496,7 @@ subroutine cnaupd else if (mode .eq. 1 .and. bmat .eq. 'G') then ierr = -11 end if -c +c c %------------% c | Error Exit | c %------------% @@ -506,14 +506,14 @@ subroutine cnaupd ido = 99 go to 9000 end if -c +c c %------------------------% c | Set default parameters | c %------------------------% c if (nb .le. 0) nb = 1 - if (tol .le. 0.0E+0 ) tol = slamch('EpsMach') - if (ishift .ne. 0 .and. + if (tol .le. 0.0E+0 ) tol = slamch('EpsMach') + if (ishift .ne. 0 .and. & ishift .ne. 1 .and. & ishift .ne. 2) ishift = 1 c @@ -525,8 +525,8 @@ subroutine cnaupd c %----------------------------------------------% c np = ncv - nev - nev0 = nev -c + nev0 = nev +c c %-----------------------------% c | Zero out internal workspace | c %-----------------------------% @@ -534,7 +534,7 @@ subroutine cnaupd do 10 j = 1, 3*ncv**2 + 5*ncv workl(j) = zero 10 continue -c +c c %-------------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -572,12 +572,12 @@ subroutine cnaupd c | Carry out the Implicitly restarted Arnoldi Iteration. | c %-------------------------------------------------------% c - call cnaup2 + call cnaup2 & ( ido, bmat, n, which, nev0, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritz), - & workl(bounds), workl(iq), ldq, workl(iw), + & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritz), + & workl(bounds), workl(iq), ldq, workl(iw), & ipntr, workd, rwork, info ) -c +c c %--------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP. | @@ -585,7 +585,7 @@ subroutine cnaupd c if (ido .eq. 3) iparam(8) = np if (ido .ne. 99) go to 9000 -c +c iparam(3) = mxiter iparam(5) = np iparam(9) = nopx @@ -601,17 +601,17 @@ subroutine cnaupd if (info .eq. 2) info = 3 c if (msglvl .gt. 0) then - call ivout (logfil, 1, mxiter, ndigit, + call ivout (logfil, 1, [mxiter], ndigit, & '_naupd: Number of update iterations taken') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naupd: Number of wanted "converged" Ritz values') - call cvout (logfil, np, workl(ritz), ndigit, + call cvout (logfil, np, workl(ritz), ndigit, & '_naupd: The final Ritz values') - call cvout (logfil, np, workl(bounds), ndigit, + call cvout (logfil, np, workl(bounds), ndigit, & '_naupd: Associated Ritz estimates') end if c - call second (t1) + call arscnd (t1) tcaupd = t1 - t0 c if (msglvl .gt. 0) then @@ -627,8 +627,8 @@ subroutine cnaupd 1000 format (//, & 5x, '=============================================',/ & 5x, '= Complex implicit Arnoldi update code =',/ - & 5x, '= Version Number: ', ' 2.3', 21x, ' =',/ - & 5x, '= Version Date: ', ' 07/31/96', 16x, ' =',/ + & 5x, '= Version Number: ', ' 2.3' , 21x, ' =',/ + & 5x, '= Version Date: ', ' 07/31/96' , 16x, ' =',/ & 5x, '=============================================',/ & 5x, '= Summary of timing statistics =',/ & 5x, '=============================================',//) diff --git a/Toolbox/arpack-src/cneigh.f b/Toolbox/arpack-src/cneigh.f index 922102e33..2e2d4d726 100644 --- a/Toolbox/arpack-src/cneigh.f +++ b/Toolbox/arpack-src/cneigh.f @@ -12,7 +12,7 @@ c c\Arguments c RNORM Real scalar. (INPUT) -c Residual norm corresponding to the current upper Hessenberg +c Residual norm corresponding to the current upper Hessenberg c matrix H. c c N Integer. (INPUT) @@ -30,8 +30,8 @@ c c BOUNDS Complex array of length N. (OUTPUT) c On output, BOUNDS contains the Ritz estimates associated with -c the eigenvalues held in RITZ. This is equal to RNORM -c times the last components of the eigenvectors corresponding +c the eigenvalues held in RITZ. This is equal to RNORM +c times the last components of the eigenvectors corresponding c to the eigenvalues in RITZ. c c Q Complex N by N array. (WORKSPACE) @@ -48,7 +48,7 @@ c c RWORK Real work array of length N (WORKSPACE) c Private (replicated) array on each PE or array allocated on -c the front end. +c the front end. c c IERR Integer. (OUTPUT) c Error exit flag from clahqr or ctrevc. @@ -64,7 +64,7 @@ c c\Routines called: c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c cmout ARPACK utility routine that prints matrices c cvout ARPACK utility routine that prints vectors. c svout ARPACK utility routine that prints vectors. @@ -74,18 +74,18 @@ c claset LAPACK matrix initialization routine. c ctrevc LAPACK routine to compute the eigenvectors of a matrix c in upper triangular form -c ccopy Level 1 BLAS that copies one vector to another. +c ccopy Level 1 BLAS that copies one vector to another. c csscal Level 1 BLAS that scales a complex vector by a real number. c scnrm2 Level 1 BLAS that computes the norm of a vector. -c +c c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: neigh.F SID: 2.2 DATE OF SID: 4/20/96 RELEASE: 2 @@ -97,52 +97,52 @@ c c----------------------------------------------------------------------- c - subroutine cneigh (rnorm, n, h, ldh, ritz, bounds, + subroutine cneigh (rnorm, n, h, ldh, ritz, bounds, & q, ldq, workl, rwork, ierr) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | c %------------------% c integer ierr, n, ldh, ldq - Real + Real & rnorm c c %-----------------% c | Array Arguments | c %-----------------% c - Complex + Complex & bounds(n), h(ldh,n), q(ldq,n), ritz(n), - & workl(n*(n+3)) - Real + & workl(n*(n+3)) + Real & rwork(n) -c +c c %------------% c | Parameters | c %------------% c - Complex + Complex & one, zero Real & rone parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0), & rone = 1.0E+0) -c +c c %------------------------% c | Local Scalars & Arrays | c %------------------------% c logical select(1) integer j, msglvl - Complex + Complex & vl(1) Real & temp @@ -151,14 +151,14 @@ subroutine cneigh (rnorm, n, h, ldh, ritz, bounds, c | External Subroutines | c %----------------------% c - external clacpy, clahqr, ctrevc, ccopy, - & csscal, cmout, cvout, second + external clacpy, clahqr, ctrevc, ccopy, + & csscal, cmout, cvout, arscnd c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & scnrm2 external scnrm2 c @@ -171,19 +171,19 @@ subroutine cneigh (rnorm, n, h, ldh, ritz, bounds, c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mceigh -c +c if (msglvl .gt. 2) then - call cmout (logfil, n, n, h, ldh, ndigit, + call cmout (logfil, n, n, h, ldh, ndigit, & '_neigh: Entering upper Hessenberg matrix H ') end if -c +c c %----------------------------------------------------------% c | 1. Compute the eigenvalues, the last components of the | c | corresponding Schur vectors and the full Schur form T | c | of the current upper Hessenberg matrix H. | -c | clahqr returns the full Schur form of H | +c | clahqr returns the full Schur form of H | c | in WORKL(1:N**2), and the Schur vectors in q. | c %----------------------------------------------------------% c @@ -205,7 +205,7 @@ subroutine cneigh (rnorm, n, h, ldh, ritz, bounds, c | eigenvectors. | c %----------------------------------------------------------% c - call ctrevc ('Right', 'Back', select, n, workl, n, vl, n, q, + call ctrevc ('Right', 'Back', select, n, workl, n, vl, n, q, & ldq, n, n, workl(n*n+1), rwork, ierr) c if (ierr .ne. 0) go to 9000 @@ -244,7 +244,7 @@ subroutine cneigh (rnorm, n, h, ldh, ritz, bounds, & '_neigh: Ritz estimates for the eigenvalues of H') end if c - call second(t1) + call arscnd(t1) tceigh = tceigh + (t1 - t0) c 9000 continue diff --git a/Toolbox/arpack-src/cneupd.f b/Toolbox/arpack-src/cneupd.f index 17c051dac..29154ce37 100644 --- a/Toolbox/arpack-src/cneupd.f +++ b/Toolbox/arpack-src/cneupd.f @@ -1,48 +1,48 @@ c\BeginDoc -c -c\Name: cneupd -c -c\Description: -c This subroutine returns the converged approximations to eigenvalues -c of A*z = lambda*B*z and (optionally): -c -c (1) The corresponding approximate eigenvectors; -c -c (2) An orthonormal basis for the associated approximate -c invariant subspace; -c -c (3) Both. -c -c There is negligible additional cost to obtain eigenvectors. An orthonormal +c +c\Name: cneupd +c +c\Description: +c This subroutine returns the converged approximations to eigenvalues +c of A*z = lambda*B*z and (optionally): +c +c (1) The corresponding approximate eigenvectors; +c +c (2) An orthonormal basis for the associated approximate +c invariant subspace; +c +c (3) Both. +c +c There is negligible additional cost to obtain eigenvectors. An orthonormal c basis is always computed. There is an additional storage cost of n*nev -c if both are requested (in this case a separate array Z must be supplied). +c if both are requested (in this case a separate array Z must be supplied). c c The approximate eigenvalues and eigenvectors of A*z = lambda*B*z c are derived from approximate eigenvalues and eigenvectors of c of the linear operator OP prescribed by the MODE selection in the c call to CNAUPD. CNAUPD must be called before this routine is called. c These approximate eigenvalues and vectors are commonly called Ritz -c values and Ritz vectors respectively. They are referred to as such -c in the comments that follow. The computed orthonormal basis for the -c invariant subspace corresponding to these Ritz values is referred to as a -c Schur basis. -c +c values and Ritz vectors respectively. They are referred to as such +c in the comments that follow. The computed orthonormal basis for the +c invariant subspace corresponding to these Ritz values is referred to as a +c Schur basis. +c c The definition of OP as well as other terms and the relation of computed c Ritz values and vectors of OP with respect to the given problem -c A*z = lambda*B*z may be found in the header of CNAUPD. For a brief +c A*z = lambda*B*z may be found in the header of CNAUPD. For a brief c description, see definitions of IPARAM(7), MODE and WHICH in the c documentation of CNAUPD. c c\Usage: -c call cneupd -c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, WORKEV, BMAT, -c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, +c call cneupd +c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, WORKEV, BMAT, +c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, c WORKL, LWORKL, RWORK, INFO ) c c\Arguments: c RVEC LOGICAL (INPUT) c Specifies whether a basis for the invariant subspace corresponding -c to the converged Ritz value approximations for the eigenproblem +c to the converged Ritz value approximations for the eigenproblem c A*z = lambda*B*z is computed. c c RVEC = .FALSE. Compute Ritz values only. @@ -51,7 +51,7 @@ c See Remarks below. c c HOWMNY Character*1 (INPUT) -c Specifies the form of the basis for the invariant subspace +c Specifies the form of the basis for the invariant subspace c corresponding to the converged Ritz values that is to be computed. c c = 'A': Compute NEV Ritz vectors; @@ -62,34 +62,34 @@ c SELECT Logical array of dimension NCV. (INPUT) c If HOWMNY = 'S', SELECT specifies the Ritz vectors to be c computed. To select the Ritz vector corresponding to a -c Ritz value D(j), SELECT(j) must be set to .TRUE.. -c If HOWMNY = 'A' or 'P', SELECT need not be initialized +c Ritz value D(j), SELECT(j) must be set to .TRUE.. +c If HOWMNY = 'A' or 'P', SELECT need not be initialized c but it is used as internal workspace. c c D Complex array of dimension NEV+1. (OUTPUT) -c On exit, D contains the Ritz approximations +c On exit, D contains the Ritz approximations c to the eigenvalues lambda for A*z = lambda*B*z. c c Z Complex N by NEV array (OUTPUT) -c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of -c Z represents approximate eigenvectors (Ritz vectors) corresponding +c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of +c Z represents approximate eigenvectors (Ritz vectors) corresponding c to the NCONV=IPARAM(5) Ritz values for eigensystem c A*z = lambda*B*z. c c If RVEC = .FALSE. or HOWMNY = 'P', then Z is NOT REFERENCED. c -c NOTE: If if RVEC = .TRUE. and a Schur basis is not required, -c the array Z may be set equal to first NEV+1 columns of the Arnoldi -c basis array V computed by CNAUPD. In this case the Arnoldi basis +c NOTE: If if RVEC = .TRUE. and a Schur basis is not required, +c the array Z may be set equal to first NEV+1 columns of the Arnoldi +c basis array V computed by CNAUPD. In this case the Arnoldi basis c will be destroyed and overwritten with the eigenvector basis. c c LDZ Integer. (INPUT) c The leading dimension of the array Z. If Ritz vectors are -c desired, then LDZ .ge. max( 1, N ) is required. +c desired, then LDZ .ge. max( 1, N ) is required. c In any case, LDZ .ge. 1 is required. c c SIGMA Complex (INPUT) -c If IPARAM(7) = 3 then SIGMA represents the shift. +c If IPARAM(7) = 3 then SIGMA represents the shift. c Not referenced if IPARAM(7) = 1 or 2. c c WORKEV Complex work array of dimension 2*NCV. (WORKSPACE) @@ -97,12 +97,12 @@ c **** The remaining arguments MUST be the same as for the **** c **** call to CNAUPD that was just completed. **** c -c NOTE: The remaining arguments +c NOTE: The remaining arguments c -c BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, -c WORKD, WORKL, LWORKL, RWORK, INFO +c BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, +c WORKD, WORKL, LWORKL, RWORK, INFO c -c must be passed directly to CNEUPD following the last call +c must be passed directly to CNEUPD following the last call c to CNAUPD. These arguments MUST NOT BE MODIFIED between c the the last call to CNAUPD and the call to CNEUPD. c @@ -128,7 +128,7 @@ c WORKL(1:ncv*ncv+2*ncv) contains information obtained in c cnaupd. They are not changed by cneupd. c WORKL(ncv*ncv+2*ncv+1:3*ncv*ncv+4*ncv) holds the -c untransformed Ritz values, the untransformed error estimates of +c untransformed Ritz values, the untransformed error estimates of c the Ritz values, the upper triangular matrix for H, and the c associated matrix representation of the invariant subspace for H. c @@ -161,7 +161,7 @@ c c = -1: N must be positive. c = -2: NEV must be positive. -c = -3: NCV-NEV >= 1 and less than or equal to N. +c = -3: NCV-NEV >= 2 and less than or equal to N. c = -5: WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI' c = -6: BMAT must be one of 'I' or 'G'. c = -7: Length of private work WORKL array is not sufficient. @@ -187,18 +187,18 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B. Nour-Omid, B. N. Parlett, T. Ericsson and P. S. Jensen, c "How to Implement the Spectral Transformation", Math Comp., -c Vol. 48, No. 178, April, 1987 pp. 664-673. +c Vol. 48, No. 178, April, 1987 pp. 664-673. c c\Routines called: c ivout ARPACK utility routine that prints integers. c cmout ARPACK utility routine that prints matrices c cvout ARPACK utility routine that prints vectors. -c cgeqr2 LAPACK routine that computes the QR factorization of +c cgeqr2 LAPACK routine that computes the QR factorization of c a matrix. c clacpy LAPACK matrix copy routine. c clahqr LAPACK routine that computes the Schur form of a @@ -207,7 +207,7 @@ c ctrevc LAPACK routine to compute the eigenvectors of a matrix c in upper triangular form. c ctrsen LAPACK routine that re-orders the Schur form. -c cunm2r LAPACK routine that applies an orthogonal matrix in +c cunm2r LAPACK routine that applies an orthogonal matrix in c factored form. c slamch LAPACK routine that determines machine constants. c ctrmm Level 3 BLAS matrix times an upper triangular matrix. @@ -219,7 +219,7 @@ c c\Remarks c -c 1. Currently only HOWMNY = 'A' and 'P' are implemented. +c 1. Currently only HOWMNY = 'A' and 'P' are implemented. c c 2. Schur vectors are an orthogonal representation for the basis of c Ritz vectors. Thus, their numerical properties are often superior. @@ -227,20 +227,20 @@ c A * V(:,1:IPARAM(5)) = V(:,1:IPARAM(5)) * T, and c transpose( V(:,1:IPARAM(5)) ) * V(:,1:IPARAM(5)) = I c are approximately satisfied. -c Here T is the leading submatrix of order IPARAM(5) of the -c upper triangular matrix stored workl(ipntr(12)). +c Here T is the leading submatrix of order IPARAM(5) of the +c upper triangular matrix stored workl(ipntr(12)). c c\Authors c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University -c Chao Yang Houston, Texas -c Dept. of Computational & -c Applied Mathematics -c Rice University +c Chao Yang Houston, Texas +c Dept. of Computational & +c Applied Mathematics +c Rice University c Houston, Texas c c\SCCS Information: @(#) -c FILE: neupd.F SID: 2.8 DATE OF SID: 07/21/02 RELEASE: 2 +c FILE: neupd.F SID: 2.7 DATE OF SID: 09/20/00 RELEASE: 2 c c\EndLib c @@ -256,8 +256,8 @@ subroutine cneupd(rvec , howmny, select, d , c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -266,9 +266,9 @@ subroutine cneupd(rvec , howmny, select, d , character bmat, howmny, which*2 logical rvec integer info, ldz, ldv, lworkl, n, ncv, nev - Complex + Complex & sigma - Real + Real & tol c c %-----------------% @@ -281,7 +281,7 @@ subroutine cneupd(rvec , howmny, select, d , & rwork(ncv) Complex & d(nev) , resid(n) , v(ldv,ncv), - & z(ldz, nev), + & z(ldz, nev), & workd(3*n) , workl(lworkl), workev(2*ncv) c c %------------% @@ -301,7 +301,7 @@ subroutine cneupd(rvec , howmny, select, d , & invsub, iuptri, iwev , j , ldh , ldq , & mode , msglvl, ritz , wr , k , irz , & ibd , outncv, iq , np , numcnv, jj , - & ishift + & ishift, nconv2 Complex & rnorm, temp, vl(1) Real @@ -315,7 +315,7 @@ subroutine cneupd(rvec , howmny, select, d , external ccopy , cgeru, cgeqr2, clacpy, cmout, & cunm2r, ctrmm, cvout, ivout, & clahqr -c +c c %--------------------% c | External Functions | c %--------------------% @@ -325,13 +325,13 @@ subroutine cneupd(rvec , howmny, select, d , external scnrm2, slamch, slapy2 c Complex - & cdotc - external cdotc + & ccdotc + external ccdotc c c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -362,7 +362,7 @@ subroutine cneupd(rvec , howmny, select, d , ierr = -1 else if (nev .le. 0) then ierr = -2 - else if (ncv .le. nev .or. ncv .gt. n) then + else if (ncv .le. nev+1 .or. ncv .gt. n) then ierr = -3 else if (which .ne. 'LM' .and. & which .ne. 'SM' .and. @@ -382,12 +382,12 @@ subroutine cneupd(rvec , howmny, select, d , else if (howmny .eq. 'S' ) then ierr = -12 end if -c +c if (mode .eq. 1 .or. mode .eq. 2) then type = 'REGULR' else if (mode .eq. 3 ) then type = 'SHIFTI' - else + else ierr = -10 end if if (mode .eq. 1 .and. bmat .eq. 'G') ierr = -11 @@ -400,7 +400,7 @@ subroutine cneupd(rvec , howmny, select, d , info = ierr go to 9000 end if -c +c c %--------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, WORKEV, Q | c | etc... and the remaining workspace. | @@ -428,7 +428,7 @@ subroutine cneupd(rvec , howmny, select, d , c | subspace for H. | c | GRAND total of NCV * ( 3 * NCV + 4 ) locations. | c %-----------------------------------------------------------% -c +c ih = ipntr(5) ritz = ipntr(6) iq = ipntr(7) @@ -524,7 +524,7 @@ subroutine cneupd(rvec , howmny, select, d , & .le. tol*rtemp) then select(jj) = .true. numcnv = numcnv + 1 - if (jj .gt. nev) reord = .true. + if (jj .gt. nconv) reord = .true. endif 11 continue c @@ -536,9 +536,9 @@ subroutine cneupd(rvec , howmny, select, d , c %-----------------------------------------------------------% c if (msglvl .gt. 2) then - call ivout(logfil, 1, numcnv, ndigit, + call ivout(logfil, 1, [numcnv], ndigit, & '_neupd: Number of specified eigenvalues') - call ivout(logfil, 1, nconv, ndigit, + call ivout(logfil, 1, [nconv], ndigit, & '_neupd: Number of "converged" eigenvalues') end if c @@ -555,10 +555,10 @@ subroutine cneupd(rvec , howmny, select, d , c %-------------------------------------------------------% c call ccopy(ldh*ncv, workl(ih), 1, workl(iuptri), 1) - call claset('All', ncv, ncv , + call claset('All', ncv, ncv , & zero , one, workl(invsub), & ldq) - call clahqr(.true., .true. , ncv , + call clahqr(.true., .true. , ncv , & 1 , ncv , workl(iuptri), & ldh , workl(iheig) , 1 , & ncv , workl(invsub), ldq , @@ -577,7 +577,7 @@ subroutine cneupd(rvec , howmny, select, d , call cvout (logfil, ncv, workl(ihbds), ndigit, & '_neupd: Last row of the Schur vector matrix') if (msglvl .gt. 3) then - call cmout (logfil , ncv, ncv , + call cmout (logfil , ncv, ncv , & workl(iuptri), ldh, ndigit, & '_neupd: The upper triangular matrix ') end if @@ -592,9 +592,13 @@ subroutine cneupd(rvec , howmny, select, d , call ctrsen('None' , 'V' , select , & ncv , workl(iuptri), ldh , & workl(invsub), ldq , workl(iheig), - & nconv , conds , sep , + & nconv2 , conds , sep , & workev , ncv , ierr) c + if (nconv2 .lt. nconv) then + nconv = nconv2 + end if + if (ierr .eq. 1) then info = 1 go to 9000 @@ -621,7 +625,7 @@ subroutine cneupd(rvec , howmny, select, d , c call ccopy(ncv , workl(invsub+ncv-1), ldq, & workl(ihbds), 1) -c +c c %--------------------------------------------% c | Place the computed eigenvalues of H into D | c | if a spectral transformation was not used. | @@ -647,7 +651,7 @@ subroutine cneupd(rvec , howmny, select, d , c | * Postmultiply Z by R. | c | The N by NCONV matrix Z is now a matrix representation | c | of the approximate invariant subspace associated with | -c | the Ritz values in workl(iheig). The first NCONV | +c | the Ritz values in workl(iheig). The first NCONV | c | columns of V are now approximate Schur vectors | c | associated with the upper triangular matrix of order | c | NCONV in workl(iuptri). | @@ -670,7 +674,7 @@ subroutine cneupd(rvec , howmny, select, d , c | matrix consisting of plus or minus ones. | c %---------------------------------------------------% c - if ( real( workl(invsub+(j-1)*ldq+j-1) ) .lt. + if ( real( workl(invsub+(j-1)*ldq+j-1) ) .lt. & real(zero) ) then call cscal(nconv, -one, workl(iuptri+j-1), ldq) call cscal(nconv, -one, workl(iuptri+(j-1)*ldq), 1) @@ -726,8 +730,8 @@ subroutine cneupd(rvec , howmny, select, d , c | upper triangular, thus the length of the | c | inner product can be set to j. | c %------------------------------------------% -c - workev(j) = cdotc(j, workl(ihbds), 1, +c + workev(j) = ccdotc(j, workl(ihbds), 1, & workl(invsub+(j-1)*ldq), 1) 40 continue c @@ -746,7 +750,7 @@ subroutine cneupd(rvec , howmny, select, d , c %---------------------------------------% c | Copy Ritz estimates into workl(ihbds) | c %---------------------------------------% -c +c call ccopy(nconv, workev, 1, workl(ihbds), 1) c c %----------------------------------------------% @@ -758,7 +762,7 @@ subroutine cneupd(rvec , howmny, select, d , & 'Non-unit', n , nconv , & one , workl(invsub), ldq , & z , ldz) - end if + end if c else c @@ -781,25 +785,25 @@ subroutine cneupd(rvec , howmny, select, d , c if (type .eq. 'REGULR') then c - if (rvec) + if (rvec) & call cscal(ncv, rnorm, workl(ihbds), 1) -c +c else -c +c c %---------------------------------------% c | A spectral transformation was used. | c | * Determine the Ritz estimates of the | c | Ritz values in the original system. | c %---------------------------------------% c - if (rvec) + if (rvec) & call cscal(ncv, rnorm, workl(ihbds), 1) -c +c do 50 k=1, ncv temp = workl(iheig+k-1) workl(ihbds+k-1) = workl(ihbds+k-1) / temp / temp 50 continue -c +c end if c c %-----------------------------------------------------------% @@ -809,7 +813,7 @@ subroutine cneupd(rvec , howmny, select, d , c | NOTES: | c | *The Ritz vectors are not affected by the transformation. | c %-----------------------------------------------------------% -c +c if (type .eq. 'SHIFTI') then do 60 k=1, nconv d(k) = one / workl(iheig+k-1) + sigma @@ -864,7 +868,7 @@ subroutine cneupd(rvec , howmny, select, d , 9000 continue c return -c +c c %---------------% c | End of cneupd| c %---------------% diff --git a/Toolbox/arpack-src/cngets.f b/Toolbox/arpack-src/cngets.f index 76552f227..20626a2d5 100644 --- a/Toolbox/arpack-src/cngets.f +++ b/Toolbox/arpack-src/cngets.f @@ -2,9 +2,9 @@ c c\Name: cngets c -c\Description: +c\Description: c Given the eigenvalues of the upper Hessenberg matrix H, -c computes the NP shifts AMU that are zeros of the polynomial of +c computes the NP shifts AMU that are zeros of the polynomial of c degree NP which filters out components of the unwanted eigenvectors c corresponding to the AMU's based on some given criteria. c @@ -40,8 +40,8 @@ c On INPUT, RITZ contains the the eigenvalues of H. c On OUTPUT, RITZ are sorted so that the unwanted c eigenvalues are in the first NP locations and the wanted -c portion is in the last KEV locations. When exact shifts are -c selected, the unwanted part corresponds to the shifts to +c portion is in the last KEV locations. When exact shifts are +c selected, the unwanted part corresponds to the shifts to c be applied. Also, if ISHIFT .eq. 1, the unwanted eigenvalues c are further sorted so that the ones with largest Ritz values c are first. @@ -49,7 +49,7 @@ c BOUNDS Complex array of length KEV+NP. (INPUT/OUTPUT) c Error bounds corresponding to the ordering in RITZ. c -c +c c c\EndDoc c @@ -63,16 +63,16 @@ c\Routines called: c csortc ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c cvout ARPACK utility routine that prints vectors. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: ngets.F SID: 2.2 DATE OF SID: 4/20/96 RELEASE: 2 @@ -91,8 +91,8 @@ subroutine cngets ( ishift, which, kev, np, ritz, bounds) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -126,7 +126,7 @@ subroutine cngets ( ishift, which, kev, np, ritz, bounds) c | External Subroutines | c %----------------------% c - external cvout, csortc, second + external cvout, csortc, arscnd c c %-----------------------% c | Executable Statements | @@ -136,14 +136,14 @@ subroutine cngets ( ishift, which, kev, np, ritz, bounds) c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% -c - call second (t0) +c + call arscnd (t0) msglvl = mcgets -c +c call csortc (which, .true., kev+np, ritz, bounds) -c +c if ( ishift .eq. 1 ) then -c +c c %-------------------------------------------------------% c | Sort the unwanted Ritz values used as shifts so that | c | the ones with largest Ritz estimates are first | @@ -152,27 +152,27 @@ subroutine cngets ( ishift, which, kev, np, ritz, bounds) c | are applied in subroutine cnapps. | c | Be careful and use 'SM' since we want to sort BOUNDS! | c %-------------------------------------------------------% -c +c call csortc ( 'SM', .true., np, bounds, ritz ) c end if -c - call second (t1) +c + call arscnd (t1) tcgets = tcgets + (t1 - t0) c if (msglvl .gt. 0) then - call ivout (logfil, 1, kev, ndigit, '_ngets: KEV is') - call ivout (logfil, 1, np, ndigit, '_ngets: NP is') + call ivout (logfil, 1, [kev], ndigit, '_ngets: KEV is') + call ivout (logfil, 1, [np], ndigit, '_ngets: NP is') call cvout (logfil, kev+np, ritz, ndigit, & '_ngets: Eigenvalues of current H matrix ') - call cvout (logfil, kev+np, bounds, ndigit, + call cvout (logfil, kev+np, bounds, ndigit, & '_ngets: Ritz estimates of the current KEV+NP Ritz values') end if -c +c return -c +c c %---------------% c | End of cngets | c %---------------% -c +c end diff --git a/Toolbox/arpack-src/csortc.f b/Toolbox/arpack-src/csortc.f index 017c487f5..a02bd3ffa 100644 --- a/Toolbox/arpack-src/csortc.f +++ b/Toolbox/arpack-src/csortc.f @@ -3,9 +3,9 @@ c\Name: csortc c c\Description: -c Sorts the Complex array in X into the order +c Sorts the Complex array in X into the order c specified by WHICH and optionally applies the permutation to the -c Real array Y. +c Real array Y. c c\Usage: c call csortc @@ -15,7 +15,7 @@ c WHICH Character*2. (Input) c 'LM' -> sort X into increasing order of magnitude. c 'SM' -> sort X into decreasing order of magnitude. -c 'LR' -> sort X with real(X) in increasing algebraic order +c 'LR' -> sort X with real(X) in increasing algebraic order c 'SR' -> sort X with real(X) in decreasing algebraic order c 'LI' -> sort X with imag(X) in increasing algebraic order c 'SI' -> sort X with imag(X) in decreasing algebraic order @@ -45,9 +45,9 @@ c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c Adapted from the sort routine in LANSO. c @@ -72,7 +72,7 @@ subroutine csortc (which, apply, n, x, y) c | Array Arguments | c %-----------------% c - Complex + Complex & x(0:n-1), y(0:n-1) c c %---------------% @@ -80,9 +80,9 @@ subroutine csortc (which, apply, n, x, y) c %---------------% c integer i, igap, j - Complex + Complex & temp - Real + Real & temp1, temp2 c c %--------------------% @@ -103,7 +103,7 @@ subroutine csortc (which, apply, n, x, y) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'LM') then c c %--------------------------------------------% @@ -163,7 +163,7 @@ subroutine csortc (which, apply, n, x, y) temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -177,7 +177,7 @@ subroutine csortc (which, apply, n, x, y) 60 continue igap = igap / 2 go to 40 -c +c else if (which .eq. 'LR') then c c %------------------------------------------------% @@ -197,7 +197,7 @@ subroutine csortc (which, apply, n, x, y) temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -211,7 +211,7 @@ subroutine csortc (which, apply, n, x, y) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'SR') then c c %------------------------------------------------% @@ -230,7 +230,7 @@ subroutine csortc (which, apply, n, x, y) temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -244,7 +244,7 @@ subroutine csortc (which, apply, n, x, y) 120 continue igap = igap / 2 go to 100 -c +c else if (which .eq. 'LI') then c c %--------------------------------------------% @@ -277,7 +277,7 @@ subroutine csortc (which, apply, n, x, y) 150 continue igap = igap / 2 go to 130 -c +c else if (which .eq. 'SI') then c c %---------------------------------------------% @@ -296,7 +296,7 @@ subroutine csortc (which, apply, n, x, y) temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -311,7 +311,7 @@ subroutine csortc (which, apply, n, x, y) igap = igap / 2 go to 160 end if -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/cstatn.f b/Toolbox/arpack-src/cstatn.f index b266a5533..02f75e0b2 100644 --- a/Toolbox/arpack-src/cstatn.f +++ b/Toolbox/arpack-src/cstatn.f @@ -13,8 +13,8 @@ subroutine cstatn c | See stat.doc for documentation | c %--------------------------------% c - include 'stat.fi' - + include 'stat.h' + c %-----------------------% c | Executable Statements | c %-----------------------% @@ -24,7 +24,7 @@ subroutine cstatn nrorth = 0 nitref = 0 nrstrt = 0 - + tcaupd = 0.0E+0 tcaup2 = 0.0E+0 tcaitr = 0.0E+0 @@ -35,13 +35,13 @@ subroutine cstatn titref = 0.0E+0 tgetv0 = 0.0E+0 trvec = 0.0E+0 - + c %----------------------------------------------------% c | User time including reverse communication overhead | c %----------------------------------------------------% tmvopx = 0.0E+0 tmvbx = 0.0E+0 - + return c c %---------------% diff --git a/Toolbox/arpack-src/cvout.f b/Toolbox/arpack-src/cvout.f index 31c22fe02..1ee9afabf 100644 --- a/Toolbox/arpack-src/cvout.f +++ b/Toolbox/arpack-src/cvout.f @@ -63,21 +63,21 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) DO 30 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF (K1.NE.N) THEN - WRITE( LOUT, 9998 )K1, K2, ( CX( I ), + WRITE( LOUT, 9998 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE - WRITE( LOUT, 9997 )K1, K2, ( CX( I ), - $ I = K1, K2 ) + WRITE( LOUT, 9997 )K1, K2, ( CX( I ), + $ I = K1, K2 ) END IF 30 CONTINUE ELSE IF( NDIGIT.LE.6 ) THEN DO 40 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF (K1.NE.N) THEN - WRITE( LOUT, 9988 )K1, K2, ( CX( I ), + WRITE( LOUT, 9988 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE - WRITE( LOUT, 9987 )K1, K2, ( CX( I ), + WRITE( LOUT, 9987 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 40 CONTINUE @@ -85,11 +85,11 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) DO 50 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF (K1.NE.N) THEN - WRITE( LOUT, 9978 )K1, K2, ( CX( I ), + WRITE( LOUT, 9978 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE - WRITE( LOUT, 9977 )K1, K2, ( CX( I ), - $ I = K1, K2 ) + WRITE( LOUT, 9977 )K1, K2, ( CX( I ), + $ I = K1, K2 ) END IF 50 CONTINUE ELSE @@ -104,47 +104,47 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) * ELSE IF( NDIGIT.LE.4 ) THEN - DO 70 K1 = 1, N, 4 + DO 70 K1 = 1, N, 4 K2 = MIN0( N, K1+3 ) IF ((K1+3).LE.N) THEN - WRITE( LOUT, 9958 )K1, K2, ( CX( I ), + WRITE( LOUT, 9958 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+3-N) .EQ. 1) THEN - WRITE( LOUT, 9957 )K1, K2, ( CX( I ), + WRITE( LOUT, 9957 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+3-N) .EQ. 2) THEN - WRITE( LOUT, 9956 )K1, K2, ( CX( I ), + WRITE( LOUT, 9956 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+3-N) .EQ. 1) THEN - WRITE( LOUT, 9955 )K1, K2, ( CX( I ), + WRITE( LOUT, 9955 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 70 CONTINUE ELSE IF( NDIGIT.LE.6 ) THEN - DO 80 K1 = 1, N, 3 + DO 80 K1 = 1, N, 3 K2 = MIN0( N, K1+2 ) IF ((K1+2).LE.N) THEN - WRITE( LOUT, 9948 )K1, K2, ( CX( I ), + WRITE( LOUT, 9948 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 1) THEN - WRITE( LOUT, 9947 )K1, K2, ( CX( I ), + WRITE( LOUT, 9947 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 2) THEN - WRITE( LOUT, 9946 )K1, K2, ( CX( I ), + WRITE( LOUT, 9946 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 80 CONTINUE ELSE IF( NDIGIT.LE.8 ) THEN - DO 90 K1 = 1, N, 3 + DO 90 K1 = 1, N, 3 K2 = MIN0( N, K1+2 ) IF ((K1+2).LE.N) THEN - WRITE( LOUT, 9938 )K1, K2, ( CX( I ), + WRITE( LOUT, 9938 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 1) THEN - WRITE( LOUT, 9937 )K1, K2, ( CX( I ), + WRITE( LOUT, 9937 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 2) THEN - WRITE( LOUT, 9936 )K1, K2, ( CX( I ), + WRITE( LOUT, 9936 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 90 CONTINUE @@ -152,10 +152,10 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) DO 100 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF ((K1+2).LE.N) THEN - WRITE( LOUT, 9928 )K1, K2, ( CX( I ), + WRITE( LOUT, 9928 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 1) THEN - WRITE( LOUT, 9927 )K1, K2, ( CX( I ), + WRITE( LOUT, 9927 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 100 CONTINUE @@ -171,12 +171,12 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) * DISPLAY 4 SIGNIFICANT DIGITS * 9998 FORMAT( 1X, I4, ' - ', I4, ':', 1X, - $ 1P,2('(',E10.3,',',E10.3,') ') ) + $ 1P,2('(',E10.3,',',E10.3,') ') ) 9997 FORMAT( 1X, I4, ' - ', I4, ':', 1X, $ 1P,1('(',E10.3,',',E10.3,') ') ) * * DISPLAY 6 SIGNIFICANT DIGITS -* +* 9988 FORMAT( 1X, I4, ' - ', I4, ':', 1X, $ 1P,2('(',E12.5,',',E12.5,') ') ) 9987 FORMAT( 1X, I4, ' - ', I4, ':', 1X, @@ -192,7 +192,7 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) * DISPLAY 13 SIGNIFICANT DIGITS * 9968 FORMAT( 1X, I4, ' - ', I4, ':', 1X, - $ 1P,1('(',E20.13,',',E20.13,') ') ) + $ 1P,1('(',E20.13,',',E20.13,') ') ) * *========================================================================= * FORMAT FOR 132 COLUMNS @@ -235,6 +235,6 @@ SUBROUTINE CVOUT( LOUT, N, CX, IDIGIT, IFMT ) $ 1P,1('(',E20.13,',',E20.13,') ') ) * * -* +* 9994 FORMAT( 1X, ' ' ) END diff --git a/Toolbox/arpack-src/debug.fi b/Toolbox/arpack-src/debug.h similarity index 66% rename from Toolbox/arpack-src/debug.fi rename to Toolbox/arpack-src/debug.h index 93bd42a4c..5eb0bb1b3 100644 --- a/Toolbox/arpack-src/debug.fi +++ b/Toolbox/arpack-src/debug.h @@ -1,9 +1,16 @@ +c +c\SCCS Information: @(#) +c FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 +c +c %---------------------------------% +c | See debug.doc for documentation | +c %---------------------------------% integer logfil, ndigit, mgetv0, & msaupd, msaup2, msaitr, mseigt, msapps, msgets, mseupd, & mnaupd, mnaup2, mnaitr, mneigh, mnapps, mngets, mneupd, & mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd - common /debug/ + common /debug/ & logfil, ndigit, mgetv0, & msaupd, msaup2, msaitr, mseigt, msapps, msgets, mseupd, & mnaupd, mnaup2, mnaitr, mneigh, mnapps, mngets, mneupd, - & mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd \ No newline at end of file + & mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd diff --git a/Toolbox/arpack-src/dgetv0.f b/Toolbox/arpack-src/dgetv0.f index 910634f40..8be4fa26d 100644 --- a/Toolbox/arpack-src/dgetv0.f +++ b/Toolbox/arpack-src/dgetv0.f @@ -3,13 +3,13 @@ c c\Name: dgetv0 c -c\Description: +c\Description: c Generate a random initial residual vector for the Arnoldi process. -c Force the residual vector to be in the range of the operator OP. +c Force the residual vector to be in the range of the operator OP. c c\Usage: c call dgetv0 -c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, +c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, c IPNTR, WORKD, IERR ) c c\Arguments @@ -36,7 +36,7 @@ c B = 'G' -> generalized eigenvalue problem A*x = lambda*B*x c c ITRY Integer. (INPUT) -c ITRY counts the number of times that dgetv0 is called. +c ITRY counts the number of times that dgetv0 is called. c It should be set to 1 on the initial call to dgetv0. c c INITV Logical variable. (INPUT) @@ -55,11 +55,11 @@ c if this is a "restart". c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c RESID Double precision array of length N. (INPUT/OUTPUT) -c Initial residual vector to be generated. If RESID is +c Initial residual vector to be generated. If RESID is c provided, force RESID into the range of the operator OP. c c RNORM Double precision scalar. (OUTPUT) @@ -88,17 +88,17 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dvout ARPACK utility routine for vector output. c dlarnv LAPACK routine for generating a random vector. c dgemv Level 2 BLAS routine for matrix vector multiplication. c dcopy Level 1 BLAS that copies one vector to another. -c ddot Level 1 BLAS that computes the scalar product of two vectors. +c ddot Level 1 BLAS that computes the scalar product of two vectors. c dnrm2 Level 1 BLAS that computes the norm of a vector. c c\Author @@ -106,26 +106,26 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: getv0.F SID: 2.7 DATE OF SID: 04/07/99 RELEASE: 2 c c\EndLib c c----------------------------------------------------------------------- c - subroutine dgetv0 - & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, + subroutine dgetv0 + & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, & ipntr, workd, ierr ) -c +c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -157,17 +157,17 @@ subroutine dgetv0 c | Local Scalars & Arrays | c %------------------------% c - logical first, inits, orth + logical first, orth integer idist, iseed(4), iter, msglvl, jj Double precision & rnorm0 - save first, iseed, inits, iter, msglvl, orth, rnorm0 + save first, iseed, iter, msglvl, orth, rnorm0 c c %----------------------% c | External Subroutines | c %----------------------% c - external dlarnv, dvout, dcopy, dgemv, second + external dlarnv, dvout, dcopy, dgemv, arscnd c c %--------------------% c | External Functions | @@ -183,12 +183,6 @@ subroutine dgetv0 c intrinsic abs, sqrt c -c %-----------------% -c | Data Statements | -c %-----------------% -c - data inits /.true./ -c c %-----------------------% c | Executable Statements | c %-----------------------% @@ -199,24 +193,21 @@ subroutine dgetv0 c | random number generator | c %-----------------------------------% c - if (inits) then - iseed(1) = 1 - iseed(2) = 3 - iseed(3) = 5 - iseed(4) = 7 - inits = .false. - end if + iseed(1) = 1 + iseed(2) = 3 + iseed(3) = 5 + iseed(4) = 7 c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mgetv0 -c +c ierr = 0 iter = 0 first = .FALSE. @@ -235,23 +226,25 @@ subroutine dgetv0 idist = 2 call dlarnv (idist, iseed, n, resid) end if -c +c c %----------------------------------------------------------% c | Force the starting vector into the range of OP to handle | c | the generalized problem when B is possibly (singular). | c %----------------------------------------------------------% c - call second (t2) - if (bmat .eq. 'G') then + call arscnd (t2) + if (itry .eq. 1) then nopx = nopx + 1 ipntr(1) = 1 ipntr(2) = n + 1 call dcopy (n, resid, 1, workd, 1) ido = -1 go to 9000 + else if (itry .gt. 1 .and. bmat .eq. 'G') then + call dcopy (n, resid, 1, workd(n + 1), 1) end if end if -c +c c %-----------------------------------------% c | Back from computing OP*(initial-vector) | c %-----------------------------------------% @@ -259,26 +252,26 @@ subroutine dgetv0 if (first) go to 20 c c %-----------------------------------------------% -c | Back from computing B*(orthogonalized-vector) | +c | Back from computing OP*(orthogonalized-vector) | c %-----------------------------------------------% c if (orth) go to 40 -c +c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) end if -c +c c %------------------------------------------------------% c | Starting vector is now in the range of OP; r = OP*r; | c | Compute B-norm of starting vector. | c %------------------------------------------------------% c - call second (t2) + call arscnd (t2) first = .TRUE. + if (itry .eq. 1) call dcopy (n, workd(n + 1), 1, resid, 1) if (bmat .eq. 'G') then nbx = nbx + 1 - call dcopy (n, workd(n+1), 1, resid, 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 @@ -286,14 +279,14 @@ subroutine dgetv0 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd, 1) end if -c +c 20 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c first = .FALSE. if (bmat .eq. 'G') then rnorm0 = ddot (n, resid, 1, workd, 1) @@ -308,7 +301,7 @@ subroutine dgetv0 c %---------------------------------------------% c if (j .eq. 1) go to 50 -c +c c %---------------------------------------------------------------- c | Otherwise need to B-orthogonalize the starting vector against | c | the current Arnoldi basis using Gram-Schmidt with iter. ref. | @@ -324,16 +317,16 @@ subroutine dgetv0 orth = .TRUE. 30 continue c - call dgemv ('T', n, j-1, one, v, ldv, workd, 1, + call dgemv ('T', n, j-1, one, v, ldv, workd, 1, & zero, workd(n+1), 1) - call dgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, + call dgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, & one, resid, 1) -c +c c %----------------------------------------------------------% c | Compute the B-norm of the orthogonalized starting vector | c %----------------------------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call dcopy (n, resid, 1, workd(n+1), 1) @@ -344,14 +337,14 @@ subroutine dgetv0 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd, 1) end if -c +c 40 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c if (bmat .eq. 'G') then rnorm = ddot (n, resid, 1, workd, 1) rnorm = sqrt(abs(rnorm)) @@ -364,14 +357,14 @@ subroutine dgetv0 c %--------------------------------------% c if (msglvl .gt. 2) then - call dvout (logfil, 1, rnorm0, ndigit, + call dvout (logfil, 1, [rnorm0], ndigit, & '_getv0: re-orthonalization ; rnorm0 is') - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_getv0: re-orthonalization ; rnorm is') end if c if (rnorm .gt. 0.717*rnorm0) go to 50 -c +c iter = iter + 1 if (iter .le. 5) then c @@ -393,11 +386,11 @@ subroutine dgetv0 rnorm = zero ierr = -1 end if -c +c 50 continue c if (msglvl .gt. 0) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_getv0: B-norm of initial / restarted starting vector') end if if (msglvl .gt. 3) then @@ -405,10 +398,10 @@ subroutine dgetv0 & '_getv0: initial / restarted starting vector') end if ido = 99 -c - call second (t1) +c + call arscnd (t1) tgetv0 = tgetv0 + (t1 - t0) -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/dlaqrb.f b/Toolbox/arpack-src/dlaqrb.f deleted file mode 100644 index d851b8636..000000000 --- a/Toolbox/arpack-src/dlaqrb.f +++ /dev/null @@ -1,521 +0,0 @@ -c----------------------------------------------------------------------- -c\BeginDoc -c -c\Name: dlaqrb -c -c\Description: -c Compute the eigenvalues and the Schur decomposition of an upper -c Hessenberg submatrix in rows and columns ILO to IHI. Only the -c last component of the Schur vectors are computed. -c -c This is mostly a modification of the LAPACK routine dlahqr. -c -c\Usage: -c call dlaqrb -c ( WANTT, N, ILO, IHI, H, LDH, WR, WI, Z, INFO ) -c -c\Arguments -c WANTT Logical variable. (INPUT) -c = .TRUE. : the full Schur form T is required; -c = .FALSE.: only eigenvalues are required. -c -c N Integer. (INPUT) -c The order of the matrix H. N >= 0. -c -c ILO Integer. (INPUT) -c IHI Integer. (INPUT) -c It is assumed that H is already upper quasi-triangular in -c rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless -c ILO = 1). SLAQRB works primarily with the Hessenberg -c submatrix in rows and columns ILO to IHI, but applies -c transformations to all of H if WANTT is .TRUE.. -c 1 <= ILO <= max(1,IHI); IHI <= N. -c -c H Double precision array, dimension (LDH,N). (INPUT/OUTPUT) -c On entry, the upper Hessenberg matrix H. -c On exit, if WANTT is .TRUE., H is upper quasi-triangular in -c rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in -c standard form. If WANTT is .FALSE., the contents of H are -c unspecified on exit. -c -c LDH Integer. (INPUT) -c The leading dimension of the array H. LDH >= max(1,N). -c -c WR Double precision array, dimension (N). (OUTPUT) -c WI Double precision array, dimension (N). (OUTPUT) -c The real and imaginary parts, respectively, of the computed -c eigenvalues ILO to IHI are stored in the corresponding -c elements of WR and WI. If two eigenvalues are computed as a -c complex conjugate pair, they are stored in consecutive -c elements of WR and WI, say the i-th and (i+1)th, with -c WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the -c eigenvalues are stored in the same order as on the diagonal -c of the Schur form returned in H, with WR(i) = H(i,i), and, if -c H(i:i+1,i:i+1) is a 2-by-2 diagonal block, -c WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). -c -c Z Double precision array, dimension (N). (OUTPUT) -c On exit Z contains the last components of the Schur vectors. -c -c INFO Integer. (OUPUT) -c = 0: successful exit -c > 0: SLAQRB failed to compute all the eigenvalues ILO to IHI -c in a total of 30*(IHI-ILO+1) iterations; if INFO = i, -c elements i+1:ihi of WR and WI contain those eigenvalues -c which have been successfully computed. -c -c\Remarks -c 1. None. -c -c----------------------------------------------------------------------- -c -c\BeginLib -c -c\Local variables: -c xxxxxx real -c -c\Routines called: -c dlabad LAPACK routine that computes machine constants. -c dlamch LAPACK routine that determines machine constants. -c dlanhs LAPACK routine that computes various norms of a matrix. -c dlanv2 LAPACK routine that computes the Schur factorization of -c 2 by 2 nonsymmetric matrix in standard form. -c dlarfg LAPACK Householder reflection construction routine. -c dcopy Level 1 BLAS that copies one vector to another. -c drot Level 1 BLAS that applies a rotation to a 2 by 2 matrix. - -c -c\Author -c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas -c -c\Revision history: -c xx/xx/92: Version ' 2.4' -c Modified from the LAPACK routine dlahqr so that only the -c last component of the Schur vectors are computed. -c -c\SCCS Information: @(#) -c FILE: laqrb.F SID: 2.2 DATE OF SID: 8/27/96 RELEASE: 2 -c -c\Remarks -c 1. None -c -c\EndLib -c -c----------------------------------------------------------------------- -c - subroutine dlaqrb ( wantt, n, ilo, ihi, h, ldh, wr, wi, - & z, info ) -c -c %------------------% -c | Scalar Arguments | -c %------------------% -c - logical wantt - integer ihi, ilo, info, ldh, n -c -c %-----------------% -c | Array Arguments | -c %-----------------% -c - Double precision - & h( ldh, * ), wi( * ), wr( * ), z( * ) -c -c %------------% -c | Parameters | -c %------------% -c - Double precision - & zero, one, dat1, dat2 - parameter (zero = 0.0D+0, one = 1.0D+0, dat1 = 7.5D-1, - & dat2 = -4.375D-1) -c -c %------------------------% -c | Local Scalars & Arrays | -c %------------------------% -c - integer i, i1, i2, itn, its, j, k, l, m, nh, nr - Double precision - & cs, h00, h10, h11, h12, h21, h22, h33, h33s, - & h43h34, h44, h44s, ovfl, s, smlnum, sn, sum, - & t1, t2, t3, tst1, ulp, unfl, v1, v2, v3 - Double precision - & v( 3 ), work( 1 ) -c -c %--------------------% -c | External Functions | -c %--------------------% -c - Double precision - & dlamch, dlanhs - external dlamch, dlanhs -c -c %----------------------% -c | External Subroutines | -c %----------------------% -c - external dcopy, dlabad, dlanv2, dlarfg, drot -c -c %-----------------------% -c | Executable Statements | -c %-----------------------% -c - info = 0 -c -c %--------------------------% -c | Quick return if possible | -c %--------------------------% -c - if( n.eq.0 ) - & return - if( ilo.eq.ihi ) then - wr( ilo ) = h( ilo, ilo ) - wi( ilo ) = zero - return - end if -c -c %---------------------------------------------% -c | Initialize the vector of last components of | -c | the Schur vectors for accumulation. | -c %---------------------------------------------% -c - do 5 j = 1, n-1 - z(j) = zero - 5 continue - z(n) = one -c - nh = ihi - ilo + 1 -c -c %-------------------------------------------------------------% -c | Set machine-dependent constants for the stopping criterion. | -c | If norm(H) <= sqrt(OVFL), overflow should not occur. | -c %-------------------------------------------------------------% -c - unfl = dlamch( 'safe minimum' ) - ovfl = one / unfl - call dlabad( unfl, ovfl ) - ulp = dlamch( 'precision' ) - smlnum = unfl*( nh / ulp ) -c -c %---------------------------------------------------------------% -c | I1 and I2 are the indices of the first row and last column | -c | of H to which transformations must be applied. If eigenvalues | -c | only are computed, I1 and I2 are set inside the main loop. | -c | Zero out H(J+2,J) = ZERO for J=1:N if WANTT = .TRUE. | -c | else H(J+2,J) for J=ILO:IHI-ILO-1 if WANTT = .FALSE. | -c %---------------------------------------------------------------% -c - if( wantt ) then - i1 = 1 - i2 = n - do 8 i=1,i2-2 - h(i1+i+1,i) = zero - 8 continue - else - do 9 i=1, ihi-ilo-1 - h(ilo+i+1,ilo+i-1) = zero - 9 continue - end if -c -c %---------------------------------------------------% -c | ITN is the total number of QR iterations allowed. | -c %---------------------------------------------------% -c - itn = 30*nh -c -c ------------------------------------------------------------------ -c The main loop begins here. I is the loop index and decreases from -c IHI to ILO in steps of 1 or 2. Each iteration of the loop works -c with the active submatrix in rows and columns L to I. -c Eigenvalues I+1 to IHI have already converged. Either L = ILO or -c H(L,L-1) is negligible so that the matrix splits. -c ------------------------------------------------------------------ -c - i = ihi - 10 continue - l = ilo - if( i.lt.ilo ) - & go to 150 - -c %--------------------------------------------------------------% -c | Perform QR iterations on rows and columns ILO to I until a | -c | submatrix of order 1 or 2 splits off at the bottom because a | -c | subdiagonal element has become negligible. | -c %--------------------------------------------------------------% - - do 130 its = 0, itn -c -c %----------------------------------------------% -c | Look for a single small subdiagonal element. | -c %----------------------------------------------% -c - do 20 k = i, l + 1, -1 - tst1 = abs( h( k-1, k-1 ) ) + abs( h( k, k ) ) - if( tst1.eq.zero ) - & tst1 = dlanhs( '1', i-l+1, h( l, l ), ldh, work ) - if( abs( h( k, k-1 ) ).le.max( ulp*tst1, smlnum ) ) - & go to 30 - 20 continue - 30 continue - l = k - if( l.gt.ilo ) then -c -c %------------------------% -c | H(L,L-1) is negligible | -c %------------------------% -c - h( l, l-1 ) = zero - end if -c -c %-------------------------------------------------------------% -c | Exit from loop if a submatrix of order 1 or 2 has split off | -c %-------------------------------------------------------------% -c - if( l.ge.i-1 ) - & go to 140 -c -c %---------------------------------------------------------% -c | Now the active submatrix is in rows and columns L to I. | -c | If eigenvalues only are being computed, only the active | -c | submatrix need be transformed. | -c %---------------------------------------------------------% -c - if( .not.wantt ) then - i1 = l - i2 = i - end if -c - if( its.eq.10 .or. its.eq.20 ) then -c -c %-------------------% -c | Exceptional shift | -c %-------------------% -c - s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) - h44 = dat1*s - h33 = h44 - h43h34 = dat2*s*s -c - else -c -c %-----------------------------------------% -c | Prepare to use Wilkinson's double shift | -c %-----------------------------------------% -c - h44 = h( i, i ) - h33 = h( i-1, i-1 ) - h43h34 = h( i, i-1 )*h( i-1, i ) - end if -c -c %-----------------------------------------------------% -c | Look for two consecutive small subdiagonal elements | -c %-----------------------------------------------------% -c - do 40 m = i - 2, l, -1 -c -c %---------------------------------------------------------% -c | Determine the effect of starting the double-shift QR | -c | iteration at row M, and see if this would make H(M,M-1) | -c | negligible. | -c %---------------------------------------------------------% -c - h11 = h( m, m ) - h22 = h( m+1, m+1 ) - h21 = h( m+1, m ) - h12 = h( m, m+1 ) - h44s = h44 - h11 - h33s = h33 - h11 - v1 = ( h33s*h44s-h43h34 ) / h21 + h12 - v2 = h22 - h11 - h33s - h44s - v3 = h( m+2, m+1 ) - s = abs( v1 ) + abs( v2 ) + abs( v3 ) - v1 = v1 / s - v2 = v2 / s - v3 = v3 / s - v( 1 ) = v1 - v( 2 ) = v2 - v( 3 ) = v3 - if( m.eq.l ) - & go to 50 - h00 = h( m-1, m-1 ) - h10 = h( m, m-1 ) - tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) ) - if( abs( h10 )*( abs( v2 )+abs( v3 ) ).le.ulp*tst1 ) - & go to 50 - 40 continue - 50 continue -c -c %----------------------% -c | Double-shift QR step | -c %----------------------% -c - do 120 k = m, i - 1 -c -c ------------------------------------------------------------ -c The first iteration of this loop determines a reflection G -c from the vector V and applies it from left and right to H, -c thus creating a nonzero bulge below the subdiagonal. -c -c Each subsequent iteration determines a reflection G to -c restore the Hessenberg form in the (K-1)th column, and thus -c chases the bulge one step toward the bottom of the active -c submatrix. NR is the order of G. -c ------------------------------------------------------------ -c - nr = min( 3, i-k+1 ) - if( k.gt.m ) - & call dcopy( nr, h( k, k-1 ), 1, v, 1 ) - call dlarfg( nr, v( 1 ), v( 2 ), 1, t1 ) - if( k.gt.m ) then - h( k, k-1 ) = v( 1 ) - h( k+1, k-1 ) = zero - if( k.lt.i-1 ) - & h( k+2, k-1 ) = zero - else if( m.gt.l ) then - h( k, k-1 ) = -h( k, k-1 ) - end if - v2 = v( 2 ) - t2 = t1*v2 - if( nr.eq.3 ) then - v3 = v( 3 ) - t3 = t1*v3 -c -c %------------------------------------------------% -c | Apply G from the left to transform the rows of | -c | the matrix in columns K to I2. | -c %------------------------------------------------% -c - do 60 j = k, i2 - sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) - h( k, j ) = h( k, j ) - sum*t1 - h( k+1, j ) = h( k+1, j ) - sum*t2 - h( k+2, j ) = h( k+2, j ) - sum*t3 - 60 continue -c -c %----------------------------------------------------% -c | Apply G from the right to transform the columns of | -c | the matrix in rows I1 to min(K+3,I). | -c %----------------------------------------------------% -c - do 70 j = i1, min( k+3, i ) - sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) - h( j, k ) = h( j, k ) - sum*t1 - h( j, k+1 ) = h( j, k+1 ) - sum*t2 - h( j, k+2 ) = h( j, k+2 ) - sum*t3 - 70 continue -c -c %----------------------------------% -c | Accumulate transformations for Z | -c %----------------------------------% -c - sum = z( k ) + v2*z( k+1 ) + v3*z( k+2 ) - z( k ) = z( k ) - sum*t1 - z( k+1 ) = z( k+1 ) - sum*t2 - z( k+2 ) = z( k+2 ) - sum*t3 - - else if( nr.eq.2 ) then -c -c %------------------------------------------------% -c | Apply G from the left to transform the rows of | -c | the matrix in columns K to I2. | -c %------------------------------------------------% -c - do 90 j = k, i2 - sum = h( k, j ) + v2*h( k+1, j ) - h( k, j ) = h( k, j ) - sum*t1 - h( k+1, j ) = h( k+1, j ) - sum*t2 - 90 continue -c -c %----------------------------------------------------% -c | Apply G from the right to transform the columns of | -c | the matrix in rows I1 to min(K+3,I). | -c %----------------------------------------------------% -c - do 100 j = i1, i - sum = h( j, k ) + v2*h( j, k+1 ) - h( j, k ) = h( j, k ) - sum*t1 - h( j, k+1 ) = h( j, k+1 ) - sum*t2 - 100 continue -c -c %----------------------------------% -c | Accumulate transformations for Z | -c %----------------------------------% -c - sum = z( k ) + v2*z( k+1 ) - z( k ) = z( k ) - sum*t1 - z( k+1 ) = z( k+1 ) - sum*t2 - end if - 120 continue - - 130 continue -c -c %-------------------------------------------------------% -c | Failure to converge in remaining number of iterations | -c %-------------------------------------------------------% -c - info = i - return - - 140 continue - - if( l.eq.i ) then -c -c %------------------------------------------------------% -c | H(I,I-1) is negligible: one eigenvalue has converged | -c %------------------------------------------------------% -c - wr( i ) = h( i, i ) - wi( i ) = zero - - else if( l.eq.i-1 ) then -c -c %--------------------------------------------------------% -c | H(I-1,I-2) is negligible; | -c | a pair of eigenvalues have converged. | -c | | -c | Transform the 2-by-2 submatrix to standard Schur form, | -c | and compute and store the eigenvalues. | -c %--------------------------------------------------------% -c - call dlanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ), - & h( i, i ), wr( i-1 ), wi( i-1 ), wr( i ), wi( i ), - & cs, sn ) - - if( wantt ) then -c -c %-----------------------------------------------------% -c | Apply the transformation to the rest of H and to Z, | -c | as required. | -c %-----------------------------------------------------% -c - if( i2.gt.i ) - & call drot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh, - & cs, sn ) - call drot( i-i1-1, h( i1, i-1 ), 1, h( i1, i ), 1, cs, sn ) - sum = cs*z( i-1 ) + sn*z( i ) - z( i ) = cs*z( i ) - sn*z( i-1 ) - z( i-1 ) = sum - end if - end if -c -c %---------------------------------------------------------% -c | Decrement number of remaining iterations, and return to | -c | start of the main loop with new value of I. | -c %---------------------------------------------------------% -c - itn = itn - its - i = l - 1 - go to 10 - - 150 continue - return -c -c %---------------% -c | End of dlaqrb | -c %---------------% -c - end diff --git a/Toolbox/arpack-src/dnaitr.f b/Toolbox/arpack-src/dnaitr.f index b92db4ee9..c02cd3909 100644 --- a/Toolbox/arpack-src/dnaitr.f +++ b/Toolbox/arpack-src/dnaitr.f @@ -3,8 +3,8 @@ c c\Name: dnaitr c -c\Description: -c Reverse communication interface for applying NP additional steps to +c\Description: +c Reverse communication interface for applying NP additional steps to c a K step nonsymmetric Arnoldi factorization. c c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T @@ -20,7 +20,7 @@ c c\Usage: c call dnaitr -c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, +c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, c IPNTR, WORKD, INFO ) c c\Arguments @@ -62,8 +62,8 @@ c Number of additional Arnoldi steps to take. c c NB Integer. (INPUT) -c Blocksize to be used in the recurrence. -c Only work for NB = 1 right now. The goal is to have a +c Blocksize to be used in the recurrence. +c Only work for NB = 1 right now. The goal is to have a c program that implement both the block and non-block method. c c RESID Double precision array of length N. (INPUT/OUTPUT) @@ -75,37 +75,37 @@ c B-norm of the updated residual r_{k+p} on output. c c V Double precision N by K+NP array. (INPUT/OUTPUT) -c On INPUT: V contains the Arnoldi vectors in the first K +c On INPUT: V contains the Arnoldi vectors in the first K c columns. c On OUTPUT: V contains the new NP Arnoldi vectors in the next c NP columns. The first K columns are unchanged. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Double precision (K+NP) by (K+NP) array. (INPUT/OUTPUT) c H is used to store the generated upper Hessenberg matrix. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORK for +c Pointer to mark the starting locations in the WORK for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The calling program should not +c for reverse communication. The calling program should not c use WORKD as temporary workspace during the iteration !!!!!! -c On input, WORKD(1:N) = B*RESID and is used to save some +c On input, WORKD(1:N) = B*RESID and is used to save some c computation at the first step. c c INFO Integer. (OUTPUT) @@ -125,14 +125,14 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: c dgetv0 ARPACK routine to generate the initial vector. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dmout ARPACK utility routine that prints matrices c dvout ARPACK utility routine that prints vectors. c dlabad LAPACK routine that computes machine constants. @@ -143,7 +143,7 @@ c daxpy Level 1 BLAS that computes a vector triad. c dscal Level 1 BLAS that scales a vector. c dcopy Level 1 BLAS that copies one vector to another . -c ddot Level 1 BLAS that computes the scalar product of two vectors. +c ddot Level 1 BLAS that computes the scalar product of two vectors. c dnrm2 Level 1 BLAS that computes the norm of a vector. c c\Author @@ -151,22 +151,22 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c xx/xx/92: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: naitr.F SID: 2.4 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks c The algorithm implemented is: -c +c c restart = .false. -c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; +c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; c r_{k} contains the initial residual vector even for k = 0; -c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already +c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already c computed by the calling program. c c betaj = rnorm ; p_{k+1} = B*r_{k} ; @@ -174,7 +174,7 @@ c 1) if ( betaj < tol ) stop or restart depending on j. c ( At present tol is zero ) c if ( restart ) generate a new starting vector. -c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; +c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; c p_{j} = p_{j}/betaj c 3) r_{j} = OP*v_{j} where OP is defined as in dnaupd c For shift-invert mode p_{j} = B*v_{j} is already available. @@ -189,7 +189,7 @@ c 5) Re-orthogonalization step: c s = V_{j}'*B*r_{j} c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || -c alphaj = alphaj + s_{j}; +c alphaj = alphaj + s_{j}; c 6) Iterative refinement step: c If (rnorm1 > 0.717*rnorm) then c rnorm = rnorm1 @@ -199,7 +199,7 @@ c If this is the first time in step 6), go to 5) c Else r_{j} lies in the span of V_{j} numerically. c Set r_{j} = 0 and rnorm = 0; go to 1) -c EndIf +c EndIf c End Do c c\EndLib @@ -207,15 +207,15 @@ c----------------------------------------------------------------------- c subroutine dnaitr - & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, + & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, & ipntr, workd, info) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -250,14 +250,14 @@ subroutine dnaitr integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl, & jj Double precision - & betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl, + & betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl, & wnorm save first, orth1, orth2, rstart, step3, step4, & ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl, & betaj, rnorm1, smlnum, ulp, unfl, wnorm c c %-----------------------% -c | Local Array Arguments | +c | Local Array Arguments | c %-----------------------% c Double precision @@ -267,8 +267,8 @@ subroutine dnaitr c | External Subroutines | c %----------------------% c - external daxpy, dcopy, dscal, dgemv, dgetv0, dlabad, - & dvout, dmout, ivout, second + external daxpy, dcopy, dscal, dgemv, dgetv0, dlabad, + & dvout, dmout, ivout, arscnd c c %--------------------% c | External Functions | @@ -313,15 +313,15 @@ subroutine dnaitr end if c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mnaitr -c +c c %------------------------------% c | Initial call to this routine | c %------------------------------% @@ -337,7 +337,7 @@ subroutine dnaitr irj = ipj + n ivj = irj + n end if -c +c c %-------------------------------------------------% c | When in reverse communication mode one of: | c | STEP3, STEP4, ORTH1, ORTH2, RSTART | @@ -367,19 +367,19 @@ subroutine dnaitr c | | c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | c %--------------------------------------------------------------% - + 1000 continue c if (msglvl .gt. 1) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: generating Arnoldi vector number') - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_naitr: B-norm of the current residual is') end if -c +c c %---------------------------------------------------% c | STEP 1: Check if the B norm of j-th residual | -c | vector is zero. Equivalent to determing whether | +c | vector is zero. Equivalent to determining whether | c | an exact j-step Arnoldi factorization is present. | c %---------------------------------------------------% c @@ -393,16 +393,16 @@ subroutine dnaitr c %---------------------------------------------------% c if (msglvl .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: ****** RESTART AT STEP ******') end if -c +c c %---------------------------------------------% c | ITRY is the loop variable that controls the | c | maximum amount of times that a restart is | c | attempted. NRSTRT is used by stat.h | c %---------------------------------------------% -c +c betaj = zero nrstrt = nrstrt + 1 itry = 1 @@ -416,7 +416,7 @@ subroutine dnaitr c | RSTART = .true. flow returns here. | c %--------------------------------------% c - call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv, + call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv, & resid, rnorm, ipntr, workd, ierr) if (ido .ne. 99) go to 9000 if (ierr .lt. 0) then @@ -430,12 +430,12 @@ subroutine dnaitr c %------------------------------------------------% c info = j - 1 - call second (t1) + call arscnd (t1) tnaitr = tnaitr + (t1 - t0) ido = 99 go to 9000 end if -c +c 40 continue c c %---------------------------------------------------------% @@ -457,9 +457,9 @@ subroutine dnaitr c | use LAPACK routine SLASCL | c %-----------------------------------------% c - call dlascl ('General', i, i, rnorm, one, n, 1, + call dlascl ('General', i, i, rnorm, one, n, 1, & v(1,j), n, infol) - call dlascl ('General', i, i, rnorm, one, n, 1, + call dlascl ('General', i, i, rnorm, one, n, 1, & workd(ipj), n, infol) end if c @@ -470,29 +470,29 @@ subroutine dnaitr c step3 = .true. nopx = nopx + 1 - call second (t2) + call arscnd (t2) call dcopy (n, v(1,j), 1, workd(ivj), 1) ipntr(1) = ivj ipntr(2) = irj ipntr(3) = ipj ido = 1 -c +c c %-----------------------------------% c | Exit in order to compute OP*v_{j} | c %-----------------------------------% -c - go to 9000 +c + go to 9000 50 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | c | if step3 = .true. | c %----------------------------------% c - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) - + step3 = .false. c c %------------------------------------------% @@ -500,30 +500,30 @@ subroutine dnaitr c %------------------------------------------% c call dcopy (n, workd(irj), 1, resid, 1) -c +c c %---------------------------------------% c | STEP 4: Finish extending the Arnoldi | c | factorization to length j. | c %---------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 step4 = .true. ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-------------------------------------% c | Exit in order to compute B*OP*v_{j} | c %-------------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd(ipj), 1) end if 60 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | @@ -531,10 +531,10 @@ subroutine dnaitr c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c step4 = .false. c c %-------------------------------------% @@ -542,7 +542,7 @@ subroutine dnaitr c | Compute the B-norm of OP*v_{j}. | c %-------------------------------------% c - if (bmat .eq. 'G') then + if (bmat .eq. 'G') then wnorm = ddot (n, resid, 1, workd(ipj), 1) wnorm = sqrt(abs(wnorm)) else if (bmat .eq. 'I') then @@ -562,13 +562,13 @@ subroutine dnaitr c | Compute the j Fourier coefficients w_{j} | c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | c %------------------------------------------% -c +c call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1, & zero, h(1,j), 1) c c %--------------------------------------% c | Orthogonalize r_{j} against V_{j}. | -c | RESID contains OP*v_{j}. See STEP 3. | +c | RESID contains OP*v_{j}. See STEP 3. | c %--------------------------------------% c call dgemv ('N', n, j, -one, v, ldv, h(1,j), 1, @@ -576,51 +576,51 @@ subroutine dnaitr c if (j .gt. 1) h(j,j-1) = betaj c - call second (t4) -c + call arscnd (t4) +c orth1 = .true. c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call dcopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*r_{j} | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd(ipj), 1) - end if + end if 70 continue -c +c c %---------------------------------------------------% c | Back from reverse communication if ORTH1 = .true. | c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c orth1 = .false. c c %------------------------------% c | Compute the B-norm of r_{j}. | c %------------------------------% c - if (bmat .eq. 'G') then + if (bmat .eq. 'G') then rnorm = ddot (n, resid, 1, workd(ipj), 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then rnorm = dnrm2(n, resid, 1) end if -c +c c %-----------------------------------------------------------% c | STEP 5: Re-orthogonalization / Iterative refinement phase | c | Maximum NITER_ITREF tries. | @@ -642,20 +642,20 @@ subroutine dnaitr if (rnorm .gt. 0.717*wnorm) go to 100 iter = 0 nrorth = nrorth + 1 -c +c c %---------------------------------------------------% c | Enter the Iterative refinement phase. If further | c | refinement is necessary, loop back here. The loop | c | variable is ITER. Perform a step of Classical | c | Gram-Schmidt using all the Arnoldi vectors V_{j} | c %---------------------------------------------------% -c +c 80 continue c if (msglvl .gt. 2) then xtemp(1) = wnorm xtemp(2) = rnorm - call dvout (logfil, 2, xtemp, ndigit, + call dvout (logfil, 2, xtemp, ndigit, & '_naitr: re-orthonalization; wnorm and rnorm are') call dvout (logfil, j, h(1,j), ndigit, & '_naitr: j-th column of H') @@ -666,7 +666,7 @@ subroutine dnaitr c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | c %----------------------------------------------------% c - call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1, + call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1, & zero, workd(irj), 1) c c %---------------------------------------------% @@ -676,28 +676,28 @@ subroutine dnaitr c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | c %---------------------------------------------% c - call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1, + call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1, & one, resid, 1) call daxpy (j, one, workd(irj), 1, h(1,j), 1) -c +c orth2 = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call dcopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-----------------------------------% c | Exit in order to compute B*r_{j}. | c | r_{j} is the corrected residual. | c %-----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd(ipj), 1) - end if + end if 90 continue c c %---------------------------------------------------% @@ -705,15 +705,15 @@ subroutine dnaitr c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if c c %-----------------------------------------------------% c | Compute the B-norm of the corrected residual r_{j}. | c %-----------------------------------------------------% -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm1 = ddot (n, resid, 1, workd(ipj), 1) rnorm1 = sqrt(abs(rnorm1)) else if (bmat .eq. 'I') then @@ -721,7 +721,7 @@ subroutine dnaitr end if c if (msglvl .gt. 0 .and. iter .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: Iterative refinement for Arnoldi residual') if (msglvl .gt. 2) then xtemp(1) = rnorm @@ -749,7 +749,7 @@ subroutine dnaitr c %---------------------------------------% c rnorm = rnorm1 -c +c else c c %-------------------------------------------% @@ -771,50 +771,50 @@ subroutine dnaitr 95 continue rnorm = zero end if -c +c c %----------------------------------------------% c | Branch here directly if iterative refinement | c | wasn't necessary or after at most NITER_REF | c | steps of iterative refinement. | c %----------------------------------------------% -c +c 100 continue -c +c rstart = .false. orth2 = .false. -c - call second (t5) +c + call arscnd (t5) titref = titref + (t5 - t4) -c +c c %------------------------------------% c | STEP 6: Update j = j+1; Continue | c %------------------------------------% c j = j + 1 if (j .gt. k+np) then - call second (t1) + call arscnd (t1) tnaitr = tnaitr + (t1 - t0) ido = 99 do 110 i = max(1,k), k+np-1 -c +c c %--------------------------------------------% c | Check for splitting and deflation. | c | Use a standard test as in the QR algorithm | c | REFERENCE: LAPACK subroutine dlahqr | c %--------------------------------------------% -c +c tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) ) if( tst1.eq.zero ) & tst1 = dlanhs( '1', k+np, h, ldh, workd(n+1) ) - if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) + if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 110 continue -c +c if (msglvl .gt. 2) then - call dmout (logfil, k+np, k+np, h, ldh, ndigit, + call dmout (logfil, k+np, k+np, h, ldh, ndigit, & '_naitr: Final upper Hessenberg matrix H of order K+NP') end if -c +c go to 9000 end if c @@ -823,7 +823,7 @@ subroutine dnaitr c %--------------------------------------------------------% c go to 1000 -c +c c %---------------------------------------------------------------% c | | c | E N D O F M A I N I T E R A T I O N L O O P | diff --git a/Toolbox/arpack-src/dnapps.f b/Toolbox/arpack-src/dnapps.f index 2a72e0941..1cf372569 100644 --- a/Toolbox/arpack-src/dnapps.f +++ b/Toolbox/arpack-src/dnapps.f @@ -13,14 +13,14 @@ c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q c c where Q is an orthogonal matrix which is the product of rotations -c and reflections resulting from the NP bulge chage sweeps. +c and reflections resulting from the NP bulge change sweeps. c The updated Arnoldi factorization becomes: c c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T. c c\Usage: c call dnapps -c ( N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ, +c ( N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ, c WORKL, WORKD ) c c\Arguments @@ -29,8 +29,8 @@ c c KEV Integer. (INPUT/OUTPUT) c KEV+NP is the size of the input matrix H. -c KEV is the size of the updated matrix HNEW. KEV is only -c updated on ouput when fewer than NP shifts are applied in +c KEV is the size of the updated matrix HNEW. KEV is only +c updated on output when fewer than NP shifts are applied in c order to keep the conjugate pair together. c c NP Integer. (INPUT) @@ -38,7 +38,7 @@ c c SHIFTR, Double precision array of length NP. (INPUT) c SHIFTI Real and imaginary part of the shifts to be applied. -c Upon, entry to dnapps, the shifts must be sorted so that the +c Upon, entry to dnapps, the shifts must be sorted so that the c conjugate pairs are in consecutive locations. c c V Double precision N by (KEV+NP) array. (INPUT/OUTPUT) @@ -51,7 +51,7 @@ c program. c c H Double precision (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT) -c On INPUT, H contains the current KEV+NP by KEV+NP upper +c On INPUT, H contains the current KEV+NP by KEV+NP upper c Hessenber matrix of the Arnoldi factorization. c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg c matrix in the KEV leading submatrix. @@ -62,7 +62,7 @@ c c RESID Double precision array of length N. (INPUT/OUTPUT) c On INPUT, RESID contains the the residual vector r_{k+p}. -c On OUTPUT, RESID is the update residual vector rnew_{k} +c On OUTPUT, RESID is the update residual vector rnew_{k} c in the first KEV locations. c c Q Double precision KEV+NP by KEV+NP work array. (WORKSPACE) @@ -97,12 +97,12 @@ c c\Routines called: c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dmout ARPACK utility routine that prints matrices. c dvout ARPACK utility routine that prints vectors. c dlabad LAPACK routine that computes machine constants. c dlacpy LAPACK matrix copy routine. -c dlamch LAPACK routine that determines machine constants. +c dlamch LAPACK routine that determines machine constants. c dlanhs LAPACK routine that computes various norms of a matrix. c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c dlarf LAPACK routine that applies Householder reflection to @@ -120,13 +120,13 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: napps.F SID: 2.4 DATE OF SID: 3/28/97 RELEASE: 2 c c\Remarks @@ -141,15 +141,15 @@ c----------------------------------------------------------------------- c subroutine dnapps - & ( n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid, q, ldq, + & ( n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid, q, ldq, & workl, workd ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -162,7 +162,7 @@ subroutine dnapps c %-----------------% c Double precision - & h(ldh,kev+np), resid(n), shifti(np), shiftr(np), + & h(ldh,kev+np), resid(n), shifti(np), shiftr(np), & v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np) c c %------------% @@ -180,16 +180,16 @@ subroutine dnapps integer i, iend, ir, istart, j, jj, kplusp, msglvl, nr logical cconj, first Double precision - & c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai, + & c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai, & sigmar, smlnum, ulp, unfl, u(3), t, tau, tst1 - save first, ovfl, smlnum, ulp, unfl + save first, ovfl, smlnum, ulp, unfl c c %----------------------% c | External Subroutines | c %----------------------% c external daxpy, dcopy, dscal, dlacpy, dlarfg, dlarf, - & dlaset, dlabad, second, dlartg + & dlaset, dlabad, arscnd, dlartg c c %--------------------% c | External Functions | @@ -206,7 +206,7 @@ subroutine dnapps intrinsic abs, max, min c c %----------------% -c | Data statments | +c | Data statements | c %----------------% c data first / .true. / @@ -237,10 +237,10 @@ subroutine dnapps c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mnapps - kplusp = kev + np -c + kplusp = kev + np +c c %--------------------------------------------% c | Initialize Q to the identity to accumulate | c | the rotations and reflections | @@ -266,11 +266,11 @@ subroutine dnapps sigmai = shifti(jj) c if (msglvl .gt. 2 ) then - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: shift number.') - call dvout (logfil, 1, sigmar, ndigit, + call dvout (logfil, 1, [sigmar], ndigit, & '_napps: The real part of the shift ') - call dvout (logfil, 1, sigmai, ndigit, + call dvout (logfil, 1, [sigmai], ndigit, & '_napps: The imaginary part of the shift ') end if c @@ -335,11 +335,11 @@ subroutine dnapps & tst1 = dlanhs( '1', kplusp-jj+1, h, ldh, workl ) if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_napps: matrix splitting at row/column no.') - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: matrix splitting with shift number.') - call dvout (logfil, 1, h(i+1,i), ndigit, + call dvout (logfil, 1, h(i+1,i), ndigit, & '_napps: off diagonal element.') end if iend = i @@ -351,9 +351,9 @@ subroutine dnapps 40 continue c if (msglvl .gt. 2) then - call ivout (logfil, 1, istart, ndigit, + call ivout (logfil, 1, [istart], ndigit, & '_napps: Start of current block ') - call ivout (logfil, 1, iend, ndigit, + call ivout (logfil, 1, [iend], ndigit, & '_napps: End of current block ') end if c @@ -368,7 +368,7 @@ subroutine dnapps c | complex conjugate pair of shifts on a 2 by 2 matrix. | c %------------------------------------------------------% c - if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero ) + if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero ) & go to 100 c h11 = h(istart,istart) @@ -381,11 +381,11 @@ subroutine dnapps c f = h11 - sigmar g = h21 -c +c do 80 i = istart, iend-1 c c %-----------------------------------------------------% -c | Contruct the plane rotation G to zero out the bulge | +c | Construct the plane rotation G to zero out the bulge | c %-----------------------------------------------------% c call dlartg (f, g, c, s, r) @@ -413,7 +413,7 @@ subroutine dnapps do 50 j = i, kplusp t = c*h(i,j) + s*h(i+1,j) h(i+1,j) = -s*h(i,j) + c*h(i+1,j) - h(i,j) = t + h(i,j) = t 50 continue c c %---------------------------------------------% @@ -423,17 +423,17 @@ subroutine dnapps do 60 j = 1, min(i+2,iend) t = c*h(j,i) + s*h(j,i+1) h(j,i+1) = -s*h(j,i) + c*h(j,i+1) - h(j,i) = t + h(j,i) = t 60 continue c c %----------------------------------------------------% c | Accumulate the rotation in the matrix Q; Q <- Q*G | c %----------------------------------------------------% c - do 70 j = 1, min( i+jj, kplusp ) + do 70 j = 1, min( i+jj, kplusp ) t = c*q(j,i) + s*q(j,i+1) q(j,i+1) = - s*q(j,i) + c*q(j,i+1) - q(j,i) = t + q(j,i) = t 70 continue c c %---------------------------% @@ -449,7 +449,7 @@ subroutine dnapps c %-----------------------------------% c | Finished applying the real shift. | c %-----------------------------------% -c +c else c c %----------------------------------------------------% @@ -465,9 +465,9 @@ subroutine dnapps c %---------------------------------------------------------% c s = 2.0*sigmar - t = dlapy2 ( sigmar, sigmai ) + t = dlapy2 ( sigmar, sigmai ) u(1) = ( h11 * (h11 - s) + t * t ) / h21 + h12 - u(2) = h11 + h22 - s + u(2) = h11 + h22 - s u(3) = h32 c do 90 i = istart, iend-1 @@ -507,7 +507,7 @@ subroutine dnapps c | Accumulate the reflector in the matrix Q; Q <- Q*G | c %-----------------------------------------------------% c - call dlarf ('Right', kplusp, nr, u, 1, tau, + call dlarf ('Right', kplusp, nr, u, 1, tau, & q(1,i), ldq, workl) c c %----------------------------% @@ -526,7 +526,7 @@ subroutine dnapps c | Finished applying a complex pair of shifts | c | to the current block | c %--------------------------------------------% -c +c end if c 100 continue @@ -568,7 +568,7 @@ subroutine dnapps tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) ) if( tst1.eq.zero ) & tst1 = dlanhs( '1', kev, h, ldh, workl ) - if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) ) + if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 130 continue c @@ -581,9 +581,9 @@ subroutine dnapps c %-------------------------------------------------% c if (h(kev+1,kev) .gt. zero) - & call dgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, + & call dgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, & workd(n+1), 1) -c +c c %----------------------------------------------------------% c | Compute column 1 to kev of (V*Q) in backward order | c | taking advantage of the upper Hessenberg structure of Q. | @@ -599,15 +599,17 @@ subroutine dnapps c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). | c %-------------------------------------------------% c - call dlacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv) -c + do 150 i = 1, kev + call dcopy(n, v(1,kplusp-kev+i), 1, v(1,i), 1) + 150 continue +c c %--------------------------------------------------------------% c | Copy the (kev+1)-st column of (V*Q) in the appropriate place | c %--------------------------------------------------------------% c if (h(kev+1,kev) .gt. zero) & call dcopy (n, workd(n+1), 1, v(1,kev+1), 1) -c +c c %-------------------------------------% c | Update the residual vector: | c | r <- sigmak*r + betak*v(:,kev+1) | @@ -625,7 +627,7 @@ subroutine dnapps & '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}') call dvout (logfil, 1, h(kev+1,kev), ndigit, & '_napps: betak = e_{kev+1}^T*H*e_{kev}') - call ivout (logfil, 1, kev, ndigit, + call ivout (logfil, 1, [kev], ndigit, & '_napps: Order of the final Hessenberg matrix ') if (msglvl .gt. 2) then call dmout (logfil, kev, kev, h, ldh, ndigit, @@ -633,11 +635,11 @@ subroutine dnapps end if c end if -c +c 9000 continue - call second (t1) + call arscnd (t1) tnapps = tnapps + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/dnaup2.f b/Toolbox/arpack-src/dnaup2.f index 7afe60f8a..86375a646 100644 --- a/Toolbox/arpack-src/dnaup2.f +++ b/Toolbox/arpack-src/dnaup2.f @@ -2,67 +2,67 @@ c c\Name: dnaup2 c -c\Description: -c Intermediate level interface called by dnaupd. +c\Description: +c Intermediate level interface called by dnaupd . c c\Usage: c call dnaup2 c ( IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD, -c ISHIFT, MXITER, V, LDV, H, LDH, RITZR, RITZI, BOUNDS, +c ISHIFT, MXITER, V, LDV, H, LDH, RITZR, RITZI, BOUNDS, c Q, LDQ, WORKL, IPNTR, WORKD, INFO ) c c\Arguments c -c IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in dnaupd. -c MODE, ISHIFT, MXITER: see the definition of IPARAM in dnaupd. +c IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in dnaupd . +c MODE, ISHIFT, MXITER: see the definition of IPARAM in dnaupd . c c NP Integer. (INPUT/OUTPUT) -c Contains the number of implicit shifts to apply during -c each Arnoldi iteration. -c If ISHIFT=1, NP is adjusted dynamically at each iteration +c Contains the number of implicit shifts to apply during +c each Arnoldi iteration. +c If ISHIFT=1, NP is adjusted dynamically at each iteration c to accelerate convergence and prevent stagnation. -c This is also roughly equal to the number of matrix-vector +c This is also roughly equal to the number of matrix-vector c products (involving the operator OP) per Arnoldi iteration. c The logic for adjusting is contained within the current c subroutine. c If ISHIFT=0, NP is the number of shifts the user needs -c to provide via reverse comunication. 0 < NP < NCV-NEV. +c to provide via reverse communication. 0 < NP < NCV-NEV. c NP may be less than NCV-NEV for two reasons. The first, is -c to keep complex conjugate pairs of "wanted" Ritz values +c to keep complex conjugate pairs of "wanted" Ritz values c together. The second, is that a leading block of the current c upper Hessenberg matrix has split off and contains "unwanted" c Ritz values. -c Upon termination of the IRA iteration, NP contains the number +c Upon termination of the IRA iteration, NP contains the number c of "converged" wanted Ritz values. c c IUPD Integer. (INPUT) c IUPD .EQ. 0: use explicit restart instead implicit update. c IUPD .NE. 0: use implicit update. c -c V Double precision N by (NEV+NP) array. (INPUT/OUTPUT) -c The Arnoldi basis vectors are returned in the first NEV +c V Double precision N by (NEV+NP) array. (INPUT/OUTPUT) +c The Arnoldi basis vectors are returned in the first NEV c columns of V. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c -c H Double precision (NEV+NP) by (NEV+NP) array. (OUTPUT) +c H Double precision (NEV+NP) by (NEV+NP) array. (OUTPUT) c H is used to store the generated upper Hessenberg matrix c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c -c RITZR, Double precision arrays of length NEV+NP. (OUTPUT) +c RITZR, Double precision arrays of length NEV+NP. (OUTPUT) c RITZI RITZR(1:NEV) (resp. RITZI(1:NEV)) contains the real (resp. c imaginary) part of the computed Ritz values of OP. c -c BOUNDS Double precision array of length NEV+NP. (OUTPUT) -c BOUNDS(1:NEV) contain the error bounds corresponding to +c BOUNDS Double precision array of length NEV+NP. (OUTPUT) +c BOUNDS(1:NEV) contain the error bounds corresponding to c the computed Ritz values. -c -c Q Double precision (NEV+NP) by (NEV+NP) array. (WORKSPACE) +c +c Q Double precision (NEV+NP) by (NEV+NP) array. (WORKSPACE) c Private (replicated) work array used to accumulate the c rotation in the shift application step. c @@ -70,7 +70,7 @@ c Leading dimension of Q exactly as declared in the calling c program. c -c WORKL Double precision work array of length at least +c WORKL Double precision work array of length at least c (NEV+NP)**2 + 3*(NEV+NP). (INPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. It is used in shifts calculation, shifts @@ -79,23 +79,23 @@ c On exit, the last 3*(NEV+NP) locations of WORKL contain c the Ritz values (real,imaginary) and associated Ritz c estimates of the current Hessenberg matrix. They are -c listed in the same order as returned from dneigh. +c listed in the same order as returned from dneigh . c c If ISHIFT .EQ. O and IDO .EQ. 3, the first 2*NP locations -c of WORKL are used in reverse communication to hold the user +c of WORKL are used in reverse communication to hold the user c supplied shifts. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORKD for +c Pointer to mark the starting locations in the WORKD for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c -c WORKD Double precision work array of length 3*N. (WORKSPACE) +c +c WORKD Double precision work array of length 3*N. (WORKSPACE) c Distributed array to be used in the basic Arnoldi iteration c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration !!!!!!!!!! @@ -108,7 +108,7 @@ c Error flag on output. c = 0: Normal return. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. +c All possible eigenvalues of OP has been found. c NP returns the number of converged Ritz values. c = 2: No shifts could be applied. c = -8: Error return from LAPACK eigenvalue calculation; @@ -130,38 +130,38 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c dgetv0 ARPACK initial vector generation routine. -c dnaitr ARPACK Arnoldi factorization routine. -c dnapps ARPACK application of implicit shifts routine. -c dnconv ARPACK convergence of Ritz values routine. -c dneigh ARPACK compute Ritz values and error bounds routine. -c dngets ARPACK reorder Ritz values and error bounds routine. -c dsortc ARPACK sorting routine. +c dgetv0 ARPACK initial vector generation routine. +c dnaitr ARPACK Arnoldi factorization routine. +c dnapps ARPACK application of implicit shifts routine. +c dnconv ARPACK convergence of Ritz values routine. +c dneigh ARPACK compute Ritz values and error bounds routine. +c dngets ARPACK reorder Ritz values and error bounds routine. +c dsortc ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. -c dmout ARPACK utility routine that prints matrices -c dvout ARPACK utility routine that prints vectors. -c dlamch LAPACK routine that determines machine constants. -c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. -c dcopy Level 1 BLAS that copies one vector to another . -c ddot Level 1 BLAS that computes the scalar product of two vectors. -c dnrm2 Level 1 BLAS that computes the norm of a vector. -c dswap Level 1 BLAS that swaps two vectors. +c arscnd ARPACK utility routine for timing. +c dmout ARPACK utility routine that prints matrices +c dvout ARPACK utility routine that prints vectors. +c dlamch LAPACK routine that determines machine constants. +c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. +c dcopy Level 1 BLAS that copies one vector to another . +c ddot Level 1 BLAS that computes the scalar product of two vectors. +c dnrm2 Level 1 BLAS that computes the norm of a vector. +c dswap Level 1 BLAS that swaps two vectors. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c -c\SCCS Information: @(#) +c Rice University +c Houston, Texas +c +c\SCCS Information: @(#) c FILE: naup2.F SID: 2.8 DATE OF SID: 10/17/00 RELEASE: 2 c c\Remarks @@ -172,16 +172,16 @@ c----------------------------------------------------------------------- c subroutine dnaup2 - & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, h, ldh, ritzr, ritzi, bounds, + & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, + & ishift, mxiter, v, ldv, h, ldh, ritzr, ritzi, bounds, & q, ldq, workl, ipntr, workd, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -200,7 +200,7 @@ subroutine dnaup2 integer ipntr(13) Double precision & bounds(nev+np), h(ldh,nev+np), q(ldq,nev+np), resid(n), - & ritzi(nev+np), ritzr(nev+np), v(ldv,nev+np), + & ritzi(nev+np), ritzr(nev+np), v(ldv,nev+np), & workd(3*n), workl( (nev+np)*(nev+np+3) ) c c %------------% @@ -209,7 +209,7 @@ subroutine dnaup2 c Double precision & one, zero - parameter (one = 1.0D+0, zero = 0.0D+0) + parameter (one = 1.0D+0 , zero = 0.0D+0 ) c c %---------------% c | Local Scalars | @@ -217,12 +217,12 @@ subroutine dnaup2 c character wprime*2 logical cnorm , getv0, initv, update, ushift - integer ierr , iter , j , kplusp, msglvl, nconv, + integer ierr , iter , j , kplusp, msglvl, nconv, & nevbef, nev0 , np0 , nptemp, numcnv Double precision & rnorm , temp , eps23 save cnorm , getv0, initv, update, ushift, - & rnorm , iter , eps23, kplusp, msglvl, nconv , + & rnorm , iter , eps23, kplusp, msglvl, nconv , & nevbef, nev0 , np0 , numcnv c c %-----------------------% @@ -235,16 +235,16 @@ subroutine dnaup2 c | External Subroutines | c %----------------------% c - external dcopy , dgetv0, dnaitr, dnconv, dneigh, - & dngets, dnapps, dvout , ivout , second + external dcopy , dgetv0 , dnaitr , dnconv , dneigh , + & dngets , dnapps , dvout , ivout , arscnd c c %--------------------% c | External Functions | c %--------------------% c Double precision - & ddot, dnrm2, dlapy2, dlamch - external ddot, dnrm2, dlapy2, dlamch + & ddot , dnrm2 , dlapy2 , dlamch + external ddot , dnrm2 , dlapy2 , dlamch c c %---------------------% c | Intrinsic Functions | @@ -257,17 +257,17 @@ subroutine dnaup2 c %-----------------------% c if (ido .eq. 0) then -c - call second (t0) -c +c + call arscnd (t0) +c msglvl = mnaup2 -c +c c %-------------------------------------% c | Get the machine dependent constant. | c %-------------------------------------% c - eps23 = dlamch('Epsilon-Machine') - eps23 = eps23**(2.0D+0 / 3.0D+0) + eps23 = dlamch ('Epsilon-Machine') + eps23 = eps23**(2.0D+0 / 3.0D+0 ) c nev0 = nev np0 = np @@ -284,7 +284,7 @@ subroutine dnaup2 kplusp = nev + np nconv = 0 iter = 0 -c +c c %---------------------------------------% c | Set flags for computing the first NEV | c | steps of the Arnoldi factorization. | @@ -307,7 +307,7 @@ subroutine dnaup2 initv = .false. end if end if -c +c c %---------------------------------------------% c | Get a possibly random starting vector and | c | force it into the range of the operator OP. | @@ -316,7 +316,7 @@ subroutine dnaup2 10 continue c if (getv0) then - call dgetv0 (ido, bmat, 1, initv, n, 1, v, ldv, resid, rnorm, + call dgetv0 (ido, bmat, 1, initv, n, 1, v, ldv, resid, rnorm, & ipntr, workd, info) c if (ido .ne. 99) go to 9000 @@ -324,7 +324,7 @@ subroutine dnaup2 if (rnorm .eq. zero) then c c %-----------------------------------------% -c | The initial vector is zero. Error exit. | +c | The initial vector is zero. Error exit. | c %-----------------------------------------% c info = -9 @@ -333,7 +333,7 @@ subroutine dnaup2 getv0 = .false. ido = 0 end if -c +c c %-----------------------------------% c | Back from reverse communication : | c | continue with update step | @@ -353,14 +353,14 @@ subroutine dnaup2 c %-------------------------------------% c if (cnorm) go to 100 -c +c c %----------------------------------------------------------% c | Compute the first NEV steps of the Arnoldi factorization | c %----------------------------------------------------------% c - call dnaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, + call dnaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, & h, ldh, ipntr, workd, info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -374,7 +374,7 @@ subroutine dnaup2 info = -9999 go to 1200 end if -c +c c %--------------------------------------------------------------% c | | c | M A I N ARNOLDI I T E R A T I O N L O O P | @@ -382,28 +382,28 @@ subroutine dnaup2 c | factorization in place. | c | | c %--------------------------------------------------------------% -c +c 1000 continue c iter = iter + 1 c if (msglvl .gt. 0) then - call ivout (logfil, 1, iter, ndigit, + call ivout (logfil, 1, [iter], ndigit, & '_naup2: **** Start of major iteration number ****') end if -c +c c %-----------------------------------------------------------% c | Compute NP additional steps of the Arnoldi factorization. | c | Adjust NP since NEV might have been updated by last call | -c | to the shift application routine dnapps. | +c | to the shift application routine dnapps . | c %-----------------------------------------------------------% c np = kplusp - nev c if (msglvl .gt. 1) then - call ivout (logfil, 1, nev, ndigit, + call ivout (logfil, 1, [nev], ndigit, & '_naup2: The length of the current Arnoldi factorization') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naup2: Extend the Arnoldi factorization by') end if c @@ -415,10 +415,10 @@ subroutine dnaup2 20 continue update = .true. c - call dnaitr (ido , bmat, n , nev, np , mode , resid, + call dnaitr (ido , bmat, n , nev, np , mode , resid, & rnorm, v , ldv, h , ldh, ipntr, workd, & info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -435,16 +435,16 @@ subroutine dnaup2 update = .false. c if (msglvl .gt. 1) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_naup2: Corresponding B-norm of the residual') end if -c +c c %--------------------------------------------------------% c | Compute the eigenvalues and corresponding error bounds | c | of the current upper Hessenberg matrix. | c %--------------------------------------------------------% c - call dneigh (rnorm, kplusp, h, ldh, ritzr, ritzi, bounds, + call dneigh (rnorm, kplusp, h, ldh, ritzr, ritzi, bounds, & q, ldq, workl, ierr) c if (ierr .ne. 0) then @@ -454,12 +454,12 @@ subroutine dnaup2 c c %----------------------------------------------------% c | Make a copy of eigenvalues and corresponding error | -c | bounds obtained from dneigh. | +c | bounds obtained from dneigh . | c %----------------------------------------------------% c - call dcopy(kplusp, ritzr, 1, workl(kplusp**2+1), 1) - call dcopy(kplusp, ritzi, 1, workl(kplusp**2+kplusp+1), 1) - call dcopy(kplusp, bounds, 1, workl(kplusp**2+2*kplusp+1), 1) + call dcopy (kplusp, ritzr, 1, workl(kplusp**2+1), 1) + call dcopy (kplusp, ritzi, 1, workl(kplusp**2+kplusp+1), 1) + call dcopy (kplusp, bounds, 1, workl(kplusp**2+2*kplusp+1), 1) c c %---------------------------------------------------% c | Select the wanted Ritz values and their bounds | @@ -470,37 +470,37 @@ subroutine dnaup2 c | and NP may be updated if the NEV-th wanted Ritz | c | value has a non zero imaginary part. In this case | c | NEV is increased by one and NP decreased by one. | -c | NOTE: The last two arguments of dngets are no | +c | NOTE: The last two arguments of dngets are no | c | longer used as of version 2.1. | c %---------------------------------------------------% c nev = nev0 np = np0 numcnv = nev - call dngets (ishift, which, nev, np, ritzr, ritzi, + call dngets (ishift, which, nev, np, ritzr, ritzi, & bounds, workl, workl(np+1)) if (nev .eq. nev0+1) numcnv = nev0+1 -c +c c %-------------------% -c | Convergence test. | +c | Convergence test. | c %-------------------% c - call dcopy (nev, bounds(np+1), 1, workl(2*np+1), 1) - call dnconv (nev, ritzr(np+1), ritzi(np+1), workl(2*np+1), + call dcopy (nev, bounds(np+1), 1, workl(2*np+1), 1) + call dnconv (nev, ritzr(np+1), ritzi(np+1), workl(2*np+1), & tol, nconv) -c +c if (msglvl .gt. 2) then kp(1) = nev kp(2) = np kp(3) = numcnv kp(4) = nconv - call ivout (logfil, 4, kp, ndigit, + call ivout (logfil, 4, kp, ndigit, & '_naup2: NEV, NP, NUMCNV, NCONV are') - call dvout (logfil, kplusp, ritzr, ndigit, + call dvout (logfil, kplusp, ritzr, ndigit, & '_naup2: Real part of the eigenvalues of H') - call dvout (logfil, kplusp, ritzi, ndigit, + call dvout (logfil, kplusp, ritzi, ndigit, & '_naup2: Imaginary part of the eigenvalues of H') - call dvout (logfil, kplusp, bounds, ndigit, + call dvout (logfil, kplusp, bounds, ndigit, & '_naup2: Ritz estimates of the current NCV Ritz values') end if c @@ -521,22 +521,22 @@ subroutine dnaup2 nev = nev + 1 end if 30 continue -c - if ( (nconv .ge. numcnv) .or. +c + if ( (nconv .ge. numcnv) .or. & (iter .gt. mxiter) .or. & (np .eq. 0) ) then c if (msglvl .gt. 4) then - call dvout(logfil, kplusp, workl(kplusp**2+1), ndigit, + call dvout (logfil, kplusp, workl(kplusp**2+1), ndigit, & '_naup2: Real part of the eig computed by _neigh:') - call dvout(logfil, kplusp, workl(kplusp**2+kplusp+1), + call dvout (logfil, kplusp, workl(kplusp**2+kplusp+1), & ndigit, & '_naup2: Imag part of the eig computed by _neigh:') - call dvout(logfil, kplusp, workl(kplusp**2+kplusp*2+1), + call dvout (logfil, kplusp, workl(kplusp**2+kplusp*2+1), & ndigit, & '_naup2: Ritz eistmates computed by _neigh:') end if -c +c c %------------------------------------------------% c | Prepare to exit. Put the converged Ritz values | c | and corresponding bounds in RITZ(1:NCONV) and | @@ -552,10 +552,10 @@ subroutine dnaup2 h(3,1) = rnorm c c %----------------------------------------------% -c | To be consistent with dngets, we first do a | +c | To be consistent with dngets , we first do a | c | pre-processing sort in order to keep complex | c | conjugate pairs together. This is similar | -c | to the pre-processing sort used in dngets | +c | to the pre-processing sort used in dngets | c | except that the sort is done in the opposite | c | order. | c %----------------------------------------------% @@ -567,7 +567,7 @@ subroutine dnaup2 if (which .eq. 'LI') wprime = 'SM' if (which .eq. 'SI') wprime = 'LM' c - call dsortc (wprime, .true., kplusp, ritzr, ritzi, bounds) + call dsortc (wprime, .true., kplusp, ritzr, ritzi, bounds) c c %----------------------------------------------% c | Now sort Ritz values so that converged Ritz | @@ -583,7 +583,7 @@ subroutine dnaup2 if (which .eq. 'LI') wprime = 'SI' if (which .eq. 'SI') wprime = 'LI' c - call dsortc(wprime, .true., kplusp, ritzr, ritzi, bounds) + call dsortc (wprime, .true., kplusp, ritzr, ritzi, bounds) c c %--------------------------------------------------% c | Scale the Ritz estimate of each Ritz value | @@ -591,20 +591,20 @@ subroutine dnaup2 c %--------------------------------------------------% c do 35 j = 1, numcnv - temp = max(eps23,dlapy2(ritzr(j), + temp = max(eps23,dlapy2 (ritzr(j), & ritzi(j))) bounds(j) = bounds(j)/temp 35 continue c c %----------------------------------------------------% c | Sort the Ritz values according to the scaled Ritz | -c | esitmates. This will push all the converged ones | +c | estimates. This will push all the converged ones | c | towards the front of ritzr, ritzi, bounds | c | (in the case when NCONV < NEV.) | c %----------------------------------------------------% c wprime = 'LR' - call dsortc(wprime, .true., numcnv, bounds, ritzr, ritzi) + call dsortc (wprime, .true., numcnv, bounds, ritzr, ritzi) c c %----------------------------------------------% c | Scale the Ritz estimate back to its original | @@ -612,7 +612,7 @@ subroutine dnaup2 c %----------------------------------------------% c do 40 j = 1, numcnv - temp = max(eps23, dlapy2(ritzr(j), + temp = max(eps23, dlapy2 (ritzr(j), & ritzi(j))) bounds(j) = bounds(j)*temp 40 continue @@ -623,25 +623,25 @@ subroutine dnaup2 c | ritzr, ritzi and bound. | c %------------------------------------------------% c - call dsortc(which, .true., nconv, ritzr, ritzi, bounds) + call dsortc (which, .true., nconv, ritzr, ritzi, bounds) c if (msglvl .gt. 1) then - call dvout (logfil, kplusp, ritzr, ndigit, + call dvout (logfil, kplusp, ritzr, ndigit, & '_naup2: Sorted real part of the eigenvalues') - call dvout (logfil, kplusp, ritzi, ndigit, + call dvout (logfil, kplusp, ritzi, ndigit, & '_naup2: Sorted imaginary part of the eigenvalues') - call dvout (logfil, kplusp, bounds, ndigit, + call dvout (logfil, kplusp, bounds, ndigit, & '_naup2: Sorted ritz estimates.') end if c c %------------------------------------% -c | Max iterations have been exceeded. | +c | Max iterations have been exceeded. | c %------------------------------------% c if (iter .gt. mxiter .and. nconv .lt. numcnv) info = 1 c c %---------------------% -c | No shifts to apply. | +c | No shifts to apply. | c %---------------------% c if (np .eq. 0 .and. nconv .lt. numcnv) info = 2 @@ -650,7 +650,7 @@ subroutine dnaup2 go to 1100 c else if ( (nconv .lt. numcnv) .and. (ishift .eq. 1) ) then -c +c c %-------------------------------------------------% c | Do not have all the requested eigenvalues yet. | c | To prevent possible stagnation, adjust the size | @@ -664,32 +664,43 @@ subroutine dnaup2 else if (nev .eq. 1 .and. kplusp .gt. 3) then nev = 2 end if +c %---- Scipy fix ------------------------------------------------ +c | We must keep nev below this value, as otherwise we can get +c | np == 0 (note that dngets below can bump nev by 1). If np == 0, +c | the next call to `dnaitr` will write out-of-bounds. +c | + if (nev .gt. kplusp - 2) then + nev = kplusp - 2 + end if +c | +c %---- Scipy fix end -------------------------------------------- +c np = kplusp - nev -c +c c %---------------------------------------% c | If the size of NEV was just increased | c | resort the eigenvalues. | c %---------------------------------------% -c - if (nevbef .lt. nev) - & call dngets (ishift, which, nev, np, ritzr, ritzi, +c + if (nevbef .lt. nev) + & call dngets (ishift, which, nev, np, ritzr, ritzi, & bounds, workl, workl(np+1)) c - end if -c + end if +c if (msglvl .gt. 0) then - call ivout (logfil, 1, nconv, ndigit, + call ivout (logfil, 1, [nconv], ndigit, & '_naup2: no. of "converged" Ritz values at this iter.') if (msglvl .gt. 1) then kp(1) = nev kp(2) = np - call ivout (logfil, 2, kp, ndigit, + call ivout (logfil, 2, kp, ndigit, & '_naup2: NEV and NP are') - call dvout (logfil, nev, ritzr(np+1), ndigit, + call dvout (logfil, nev, ritzr(np+1), ndigit, & '_naup2: "wanted" Ritz values -- real part') - call dvout (logfil, nev, ritzi(np+1), ndigit, + call dvout (logfil, nev, ritzi(np+1), ndigit, & '_naup2: "wanted" Ritz values -- imag part') - call dvout (logfil, nev, bounds(np+1), ndigit, + call dvout (logfil, nev, bounds(np+1), ndigit, & '_naup2: Ritz estimates of the "wanted" values ') end if end if @@ -697,7 +708,7 @@ subroutine dnaup2 if (ishift .eq. 0) then c c %-------------------------------------------------------% -c | User specified shifts: reverse comminucation to | +c | User specified shifts: reverse communication to | c | compute the shifts. They are returned in the first | c | 2*NP locations of WORKL. | c %-------------------------------------------------------% @@ -706,7 +717,7 @@ subroutine dnaup2 ido = 3 go to 9000 end if -c +c 50 continue c c %------------------------------------% @@ -718,26 +729,26 @@ subroutine dnaup2 ushift = .false. c if ( ishift .eq. 0 ) then -c +c c %----------------------------------% c | Move the NP shifts from WORKL to | c | RITZR, RITZI to free up WORKL | c | for non-exact shift case. | c %----------------------------------% c - call dcopy (np, workl, 1, ritzr, 1) - call dcopy (np, workl(np+1), 1, ritzi, 1) + call dcopy (np, workl, 1, ritzr, 1) + call dcopy (np, workl(np+1), 1, ritzi, 1) end if c - if (msglvl .gt. 2) then - call ivout (logfil, 1, np, ndigit, + if (msglvl .gt. 2) then + call ivout (logfil, 1, [np], ndigit, & '_naup2: The number of shifts to apply ') - call dvout (logfil, np, ritzr, ndigit, + call dvout (logfil, np, ritzr, ndigit, & '_naup2: Real part of the shifts') - call dvout (logfil, np, ritzi, ndigit, + call dvout (logfil, np, ritzi, ndigit, & '_naup2: Imaginary part of the shifts') - if ( ishift .eq. 1 ) - & call dvout (logfil, np, bounds, ndigit, + if ( ishift .eq. 1 ) + & call dvout (logfil, np, bounds, ndigit, & '_naup2: Ritz estimates of the shifts') end if c @@ -748,60 +759,60 @@ subroutine dnaup2 c | The first 2*N locations of WORKD are used as workspace. | c %---------------------------------------------------------% c - call dnapps (n, nev, np, ritzr, ritzi, v, ldv, + call dnapps (n, nev, np, ritzr, ritzi, v, ldv, & h, ldh, resid, q, ldq, workl, workd) c c %---------------------------------------------% c | Compute the B-norm of the updated residual. | c | Keep B*RESID in WORKD(1:N) to be used in | -c | the first step of the next call to dnaitr. | +c | the first step of the next call to dnaitr . | c %---------------------------------------------% c cnorm = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 - call dcopy (n, resid, 1, workd(n+1), 1) + call dcopy (n, resid, 1, workd(n+1), 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*RESID | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then - call dcopy (n, resid, 1, workd, 1) + call dcopy (n, resid, 1, workd, 1) end if -c +c 100 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(1:N) := B*RESID | c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c - if (bmat .eq. 'G') then - rnorm = ddot (n, resid, 1, workd, 1) +c + if (bmat .eq. 'G') then + rnorm = ddot (n, resid, 1, workd, 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then - rnorm = dnrm2(n, resid, 1) + rnorm = dnrm2 (n, resid, 1) end if cnorm = .false. c if (msglvl .gt. 2) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_naup2: B-norm of residual for compressed factorization') - call dmout (logfil, nev, nev, h, ldh, ndigit, + call dmout (logfil, nev, nev, h, ldh, ndigit, & '_naup2: Compressed upper Hessenberg matrix H') end if -c +c go to 1000 c c %---------------------------------------------------------------% @@ -814,7 +825,7 @@ subroutine dnaup2 c mxiter = iter nev = numcnv -c +c 1200 continue ido = 99 c @@ -822,13 +833,13 @@ subroutine dnaup2 c | Error Exit | c %------------% c - call second (t1) + call arscnd (t1) tnaup2 = t1 - t0 -c +c 9000 continue c c %---------------% -c | End of dnaup2 | +c | End of dnaup2 | c %---------------% c return diff --git a/Toolbox/arpack-src/dnaupd.f b/Toolbox/arpack-src/dnaupd.f index 7fe403d6b..0b4cbb0d8 100644 --- a/Toolbox/arpack-src/dnaupd.f +++ b/Toolbox/arpack-src/dnaupd.f @@ -2,19 +2,19 @@ c c\Name: dnaupd c -c\Description: +c\Description: c Reverse communication interface for the Implicitly Restarted Arnoldi -c iteration. This subroutine computes approximations to a few eigenpairs -c of a linear operator "OP" with respect to a semi-inner product defined by -c a symmetric positive semi-definite real matrix B. B may be the identity -c matrix. NOTE: If the linear operator "OP" is real and symmetric -c with respect to the real positive semi-definite symmetric matrix B, -c i.e. B*OP = (OP`)*B, then subroutine dsaupd should be used instead. +c iteration. This subroutine computes approximations to a few eigenpairs +c of a linear operator "OP" with respect to a semi-inner product defined by +c a symmetric positive semi-definite real matrix B. B may be the identity +c matrix. NOTE: If the linear operator "OP" is real and symmetric +c with respect to the real positive semi-definite symmetric matrix B, +c i.e. B*OP = (OP`)*B, then subroutine dsaupd should be used instead. c c The computed approximate eigenvalues are called Ritz values and c the corresponding approximate eigenvectors are called Ritz vectors. c -c dnaupd is usually called iteratively to solve one of the +c dnaupd is usually called iteratively to solve one of the c following problems: c c Mode 1: A*x = lambda*x. @@ -25,18 +25,18 @@ c ===> (If M can be factored see remark 3 below) c c Mode 3: A*x = lambda*M*x, M symmetric semi-definite -c ===> OP = Real_Part{ inv[A - sigma*M]*M } and B = M. +c ===> OP = Real_Part{ inv[A - sigma*M]*M } and B = M. c ===> shift-and-invert mode (in real arithmetic) -c If OP*x = amu*x, then +c If OP*x = amu*x, then c amu = 1/2 * [ 1/(lambda-sigma) + 1/(lambda-conjg(sigma)) ]. c Note: If sigma is real, i.e. imaginary part of sigma is zero; -c Real_Part{ inv[A - sigma*M]*M } == inv[A - sigma*M]*M -c amu == 1/(lambda-sigma). -c +c Real_Part{ inv[A - sigma*M]*M } == inv[A - sigma*M]*M +c amu == 1/(lambda-sigma). +c c Mode 4: A*x = lambda*M*x, M symmetric semi-definite -c ===> OP = Imaginary_Part{ inv[A - sigma*M]*M } and B = M. +c ===> OP = Imaginary_Part{ inv[A - sigma*M]*M } and B = M. c ===> shift-and-invert mode (in real arithmetic) -c If OP*x = amu*x, then +c If OP*x = amu*x, then c amu = 1/2i * [ 1/(lambda-sigma) - 1/(lambda-conjg(sigma)) ]. c c Both mode 3 and 4 give the same enhancement to eigenvalues close to @@ -63,12 +63,12 @@ c c\Arguments c IDO Integer. (INPUT/OUTPUT) -c Reverse communication flag. IDO must be zero on the first -c call to dnaupd. IDO will be set internally to +c Reverse communication flag. IDO must be zero on the first +c call to dnaupd . IDO will be set internally to c indicate the type of operation to be performed. Control is c then given back to the calling routine which has the c responsibility to carry out the requested operation and call -c dnaupd with the result. The operand is given in +c dnaupd with the result. The operand is given in c WORKD(IPNTR(1)), the result must be put in WORKD(IPNTR(2)). c ------------------------------------------------------------- c IDO = 0: first call to the reverse communication interface @@ -86,13 +86,13 @@ c IDO = 2: compute Y = B * X where c IPNTR(1) is the pointer into WORKD for X, c IPNTR(2) is the pointer into WORKD for Y. -c IDO = 3: compute the IPARAM(8) real and imaginary parts +c IDO = 3: compute the IPARAM(8) real and imaginary parts c of the shifts where INPTR(14) is the pointer c into WORKL for placing the shifts. See Remark c 5 below. c IDO = 99: done c ------------------------------------------------------------- -c +c c BMAT Character*1. (INPUT) c BMAT specifies the type of the matrix B that defines the c semi-inner product for the operator OP. @@ -110,18 +110,18 @@ c 'LI' -> want the NEV eigenvalues of largest imaginary part. c 'SI' -> want the NEV eigenvalues of smallest imaginary part. c -c NEV Integer. (INPUT/OUTPUT) +c NEV Integer. (INPUT) c Number of eigenvalues of OP to be computed. 0 < NEV < N-1. c -c TOL Double precision scalar. (INPUT) -c Stopping criterion: the relative accuracy of the Ritz value +c TOL Double precision scalar. (INPUT/OUTPUT) +c Stopping criterion: the relative accuracy of the Ritz value c is considered acceptable if BOUNDS(I) .LE. TOL*ABS(RITZ(I)) c where ABS(RITZ(I)) is the magnitude when RITZ(I) is complex. -c DEFAULT = DLAMCH('EPS') (machine precision as computed -c by the LAPACK auxiliary subroutine DLAMCH). +c DEFAULT = DLAMCH ('EPS') (machine precision as computed +c by the LAPACK auxiliary subroutine DLAMCH ). c -c RESID Double precision array of length N. (INPUT/OUTPUT) -c On INPUT: +c RESID Double precision array of length N. (INPUT/OUTPUT) +c On INPUT: c If INFO .EQ. 0, a random initial residual vector is used. c If INFO .NE. 0, RESID contains the initial residual vector, c possibly from a previous run. @@ -131,17 +131,17 @@ c NCV Integer. (INPUT) c Number of columns of the matrix V. NCV must satisfy the two c inequalities 2 <= NCV-NEV and NCV <= N. -c This will indicate how many Arnoldi vectors are generated -c at each iteration. After the startup phase in which NEV -c Arnoldi vectors are generated, the algorithm generates -c approximately NCV-NEV Arnoldi vectors at each subsequent update -c iteration. Most of the cost in generating each Arnoldi vector is -c in the matrix-vector operation OP*x. -c NOTE: 2 <= NCV-NEV in order that complex conjugate pairs of Ritz +c This will indicate how many Arnoldi vectors are generated +c at each iteration. After the startup phase in which NEV +c Arnoldi vectors are generated, the algorithm generates +c approximately NCV-NEV Arnoldi vectors at each subsequent update +c iteration. Most of the cost in generating each Arnoldi vector is +c in the matrix-vector operation OP*x. +c NOTE: 2 <= NCV-NEV in order that complex conjugate pairs of Ritz c values are kept together. (See remark 4 below) c -c V Double precision array N by NCV. (OUTPUT) -c Contains the final set of Arnoldi basis vectors. +c V Double precision array N by NCV. (OUTPUT) +c Contains the final set of Arnoldi basis vectors. c c LDV Integer. (INPUT) c Leading dimension of V exactly as declared in the calling program. @@ -154,11 +154,11 @@ c ISHIFT = 0: the shifts are provided by the user via c reverse communication. The real and imaginary c parts of the NCV eigenvalues of the Hessenberg -c matrix H are returned in the part of the WORKL -c array corresponding to RITZR and RITZI. See remark +c matrix H are returned in the part of the WORKL +c array corresponding to RITZR and RITZI. See remark c 5 below. c ISHIFT = 1: exact shifts with respect to the current -c Hessenberg matrix H. This is equivalent to +c Hessenberg matrix H. This is equivalent to c restarting the iteration with a starting vector c that is a linear combination of approximate Schur c vectors associated with the "wanted" Ritz values. @@ -167,8 +167,8 @@ c IPARAM(2) = No longer referenced. c c IPARAM(3) = MXITER -c On INPUT: maximum number of Arnoldi update iterations allowed. -c On OUTPUT: actual number of Arnoldi update iterations taken. +c On INPUT: maximum number of Arnoldi update iterations allowed. +c On OUTPUT: actual number of Arnoldi update iterations taken. c c IPARAM(4) = NB: blocksize to be used in the recurrence. c The code currently works only for NB = 1. @@ -178,23 +178,23 @@ c the convergence criterion. c c IPARAM(6) = IUPD -c No longer referenced. Implicit restarting is ALWAYS used. +c No longer referenced. Implicit restarting is ALWAYS used. c c IPARAM(7) = MODE c On INPUT determines what type of eigenproblem is being solved. -c Must be 1,2,3,4; See under \Description of dnaupd for the +c Must be 1,2,3,4; See under \Description of dnaupd for the c four modes available. c c IPARAM(8) = NP c When ido = 3 and the user provides shifts through reverse -c communication (IPARAM(1)=0), dnaupd returns NP, the number +c communication (IPARAM(1)=0), dnaupd returns NP, the number c of shifts the user is to provide. 0 < NP <=NCV-NEV. See Remark c 5 below. c c IPARAM(9) = NUMOP, IPARAM(10) = NUMOPB, IPARAM(11) = NUMREO, c OUTPUT: NUMOP = total number of OP*x operations, c NUMOPB = total number of B*x operations if BMAT='G', -c NUMREO = total number of steps of re-orthogonalization. +c NUMREO = total number of steps of re-orthogonalization. c c IPNTR Integer array of length 14. (OUTPUT) c Pointer to mark the starting locations in the WORKD and WORKL @@ -202,13 +202,13 @@ c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X in WORKD. c IPNTR(2): pointer to the current result vector Y in WORKD. -c IPNTR(3): pointer to the vector B * X in WORKD when used in +c IPNTR(3): pointer to the vector B * X in WORKD when used in c the shift-and-invert mode. c IPNTR(4): pointer to the next available location in WORKL c that is untouched by the program. c IPNTR(5): pointer to the NCV by NCV upper Hessenberg matrix c H in WORKL. -c IPNTR(6): pointer to the real part of the ritz value array +c IPNTR(6): pointer to the real part of the ritz value array c RITZR in WORKL. c IPNTR(7): pointer to the imaginary part of the ritz value array c RITZI in WORKL. @@ -217,30 +217,30 @@ c c IPNTR(14): pointer to the NP shifts in WORKL. See Remark 5 below. c -c Note: IPNTR(9:13) is only referenced by dneupd. See Remark 2 below. +c Note: IPNTR(9:13) is only referenced by dneupd . See Remark 2 below. c -c IPNTR(9): pointer to the real part of the NCV RITZ values of the +c IPNTR(9): pointer to the real part of the NCV RITZ values of the c original system. -c IPNTR(10): pointer to the imaginary part of the NCV RITZ values of +c IPNTR(10): pointer to the imaginary part of the NCV RITZ values of c the original system. c IPNTR(11): pointer to the NCV corresponding error bounds. c IPNTR(12): pointer to the NCV by NCV upper quasi-triangular c Schur matrix for H. c IPNTR(13): pointer to the NCV by NCV matrix of eigenvectors c of the upper Hessenberg matrix H. Only referenced by -c dneupd if RVEC = .TRUE. See Remark 2 below. +c dneupd if RVEC = .TRUE. See Remark 2 below. c ------------------------------------------------------------- -c -c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION) +c +c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The user should not use WORKD +c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration. Upon termination c WORKD(1:N) contains B*RESID(1:N). If an invariant subspace c associated with the converged Ritz values is desired, see remark -c 2 below, subroutine dneupd uses this output. -c See Data Distribution Note below. +c 2 below, subroutine dneupd uses this output. +c See Data Distribution Note below. c -c WORKL Double precision work array of length LWORKL. (OUTPUT/WORKSPACE) +c WORKL Double precision work array of length LWORKL. (OUTPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. See Data Distribution Note below. c @@ -254,18 +254,18 @@ c Error flag on output. c = 0: Normal exit. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. IPARAM(5) +c All possible eigenvalues of OP has been found. IPARAM(5) c returns the number of wanted converged Ritz values. c = 2: No longer an informational error. Deprecated starting c with release 2 of ARPACK. -c = 3: No shifts could be applied during a cycle of the -c Implicitly restarted Arnoldi iteration. One possibility -c is to increase the size of NCV relative to NEV. +c = 3: No shifts could be applied during a cycle of the +c Implicitly restarted Arnoldi iteration. One possibility +c is to increase the size of NCV relative to NEV. c See remark 4 below. c = -1: N must be positive. c = -2: NEV must be positive. c = -3: NCV-NEV >= 2 and less than or equal to N. -c = -4: The maximum number of Arnoldi update iteration +c = -4: The maximum number of Arnoldi update iteration c must be greater than zero. c = -5: WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI' c = -6: BMAT must be one of 'I' or 'G'. @@ -273,7 +273,7 @@ c = -8: Error return from LAPACK eigenvalue calculation; c = -9: Starting vector is zero. c = -10: IPARAM(7) must be 1,2,3,4. -c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatable. +c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatible. c = -12: IPARAM(1) must be equal to 0 or 1. c = -9999: Could not build an Arnoldi factorization. c IPARAM(5) returns the size of the current Arnoldi @@ -283,33 +283,33 @@ c 1. The computed Ritz values are approximate eigenvalues of OP. The c selection of WHICH should be made with this in mind when c Mode = 3 and 4. After convergence, approximate eigenvalues of the -c original problem may be obtained with the ARPACK subroutine dneupd. +c original problem may be obtained with the ARPACK subroutine dneupd . c -c 2. If a basis for the invariant subspace corresponding to the converged Ritz -c values is needed, the user must call dneupd immediately following -c completion of dnaupd. This is new starting with release 2 of ARPACK. +c 2. If a basis for the invariant subspace corresponding to the converged Ritz +c values is needed, the user must call dneupd immediately following +c completion of dnaupd . This is new starting with release 2 of ARPACK. c c 3. If M can be factored into a Cholesky factorization M = LL` c then Mode = 2 should not be selected. Instead one should use -c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular +c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular c linear systems should be solved with L and L` rather c than computing inverses. After convergence, an approximate c eigenvector z of the original problem is recovered by solving c L`z = x where x is a Ritz vector of OP. c c 4. At present there is no a-priori analysis to guide the selection -c of NCV relative to NEV. The only formal requrement is that NCV > NEV + 2. +c of NCV relative to NEV. The only formal requirement is that NCV > NEV + 2. c However, it is recommended that NCV .ge. 2*NEV+1. If many problems of c the same type are to be solved, one should experiment with increasing -c NCV while keeping NEV fixed for a given test problem. This will +c NCV while keeping NEV fixed for a given test problem. This will c usually decrease the required number of OP*x operations but it c also increases the work and storage required to maintain the orthogonal c basis vectors. The optimal "cross-over" with respect to CPU time -c is problem dependent and must be determined empirically. +c is problem dependent and must be determined empirically. c See Chapter 8 of Reference 2 for further information. c -c 5. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the -c NP = IPARAM(8) real and imaginary parts of the shifts in locations +c 5. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the +c NP = IPARAM(8) real and imaginary parts of the shifts in locations c real part imaginary part c ----------------------- -------------- c 1 WORKL(IPNTR(14)) WORKL(IPNTR(14)+NP) @@ -319,10 +319,10 @@ c . . c NP WORKL(IPNTR(14)+NP-1) WORKL(IPNTR(14)+2*NP-1). c -c Only complex conjugate pairs of shifts may be applied and the pairs -c must be placed in consecutive locations. The real part of the -c eigenvalues of the current upper Hessenberg matrix are located in -c WORKL(IPNTR(6)) through WORKL(IPNTR(6)+NCV-1) and the imaginary part +c Only complex conjugate pairs of shifts may be applied and the pairs +c must be placed in consecutive locations. The real part of the +c eigenvalues of the current upper Hessenberg matrix are located in +c WORKL(IPNTR(6)) through WORKL(IPNTR(6)+NCV-1) and the imaginary part c in WORKL(IPNTR(7)) through WORKL(IPNTR(7)+NCV-1). They are ordered c according to the order defined by WHICH. The complex conjugate c pairs are kept together and the associated Ritz estimates are located in @@ -330,11 +330,11 @@ c c----------------------------------------------------------------------- c -c\Data Distribution Note: +c\Data Distribution Note: c c Fortran-D syntax: c ================ -c Double precision resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) +c Double precision resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) c decompose d1(n), d2(n,ncv) c align resid(i) with d1(i) c align v(i,j) with d2(i,j) @@ -346,13 +346,13 @@ c c Cray MPP syntax: c =============== -c Double precision resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) +c Double precision resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) c shared resid(block), v(block,:), workd(block,:) c replicated workl(lworkl) -c +c c CM2/CM5 syntax: c ============== -c +c c----------------------------------------------------------------------- c c include 'ex-nonsym.doc' @@ -368,7 +368,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett & Y. Saad, "Complex Shift and Invert Strategies for @@ -376,26 +376,26 @@ c pp 575-595, (1987). c c\Routines called: -c dnaup2 ARPACK routine that implements the Implicitly Restarted +c dnaup2 ARPACK routine that implements the Implicitly Restarted c Arnoldi Iteration. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. -c dvout ARPACK utility routine that prints vectors. -c dlamch LAPACK routine that determines machine constants. +c arscnd ARPACK utility routine for timing. +c dvout ARPACK utility routine that prints vectors. +c dlamch LAPACK routine that determines machine constants. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c 12/16/93: Version '1.1' c -c\SCCS Information: @(#) -c FILE: naupd.F SID: 2.10 DATE OF SID: 08/23/02 RELEASE: 2 +c\SCCS Information: @(#) +c FILE: naupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 c c\Remarks c @@ -404,15 +404,15 @@ c----------------------------------------------------------------------- c subroutine dnaupd - & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, + & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, & ipntr, workd, workl, lworkl, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -437,13 +437,13 @@ subroutine dnaupd c Double precision & one, zero - parameter (one = 1.0D+0, zero = 0.0D+0) + parameter (one = 1.0D+0 , zero = 0.0D+0 ) c c %---------------% c | Local Scalars | c %---------------% c - integer bounds, ierr, ih, iq, ishift, iupd, iw, + integer bounds, ierr, ih, iq, ishift, iupd, iw, & ldh, ldq, levec, mode, msglvl, mxiter, nb, & nev0, next, np, ritzi, ritzr, j save bounds, ih, iq, ishift, iupd, iw, ldh, ldq, @@ -454,7 +454,7 @@ subroutine dnaupd c | External Subroutines | c %----------------------% c - external dnaup2, dvout, ivout, second, dstatn + external dnaup2 , dvout , ivout, arscnd, dstatn c c %--------------------% c | External Functions | @@ -467,16 +467,16 @@ subroutine dnaupd c %-----------------------% c | Executable Statements | c %-----------------------% -c +c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c call dstatn - call second (t0) + call arscnd (t0) msglvl = mnaupd c c %----------------% @@ -498,13 +498,13 @@ subroutine dnaupd mode = iparam(7) c if (n .le. 0) then - ierr = -1 + ierr = -1 else if (nev .le. 0) then - ierr = -2 + ierr = -2 else if (ncv .le. nev+1 .or. ncv .gt. n) then - ierr = -3 - else if (mxiter .le. 0) then - ierr = 4 + ierr = -3 + else if (mxiter .le. 0) then + ierr = -4 else if (which .ne. 'LM' .and. & which .ne. 'SM' .and. & which .ne. 'LR' .and. @@ -517,13 +517,13 @@ subroutine dnaupd else if (lworkl .lt. 3*ncv**2 + 6*ncv) then ierr = -7 else if (mode .lt. 1 .or. mode .gt. 4) then - ierr = -10 + ierr = -10 else if (mode .eq. 1 .and. bmat .eq. 'G') then - ierr = -11 + ierr = -11 else if (ishift .lt. 0 .or. ishift .gt. 1) then - ierr = -12 + ierr = -12 end if -c +c c %------------% c | Error Exit | c %------------% @@ -533,13 +533,13 @@ subroutine dnaupd ido = 99 go to 9000 end if -c +c c %------------------------% c | Set default parameters | c %------------------------% c if (nb .le. 0) nb = 1 - if (tol .le. zero) tol = dlamch('EpsMach') + if (tol .le. zero) tol = dlamch ('EpsMach') c c %----------------------------------------------% c | NP is the number of additional steps to | @@ -549,8 +549,8 @@ subroutine dnaupd c %----------------------------------------------% c np = ncv - nev - nev0 = nev -c + nev0 = nev +c c %-----------------------------% c | Zero out internal workspace | c %-----------------------------% @@ -558,7 +558,7 @@ subroutine dnaupd do 10 j = 1, 3*ncv**2 + 6*ncv workl(j) = zero 10 continue -c +c c %-------------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -570,8 +570,8 @@ subroutine dnaupd c | workl(ncv*ncv+2*ncv+1:ncv*ncv+3*ncv) := error bounds | c | workl(ncv*ncv+3*ncv+1:2*ncv*ncv+3*ncv) := rotation matrix Q | c | workl(2*ncv*ncv+3*ncv+1:3*ncv*ncv+6*ncv) := workspace | -c | The final workspace is needed by subroutine dneigh called | -c | by dnaup2. Subroutine dneigh calls LAPACK routines for | +c | The final workspace is needed by subroutine dneigh called | +c | by dnaup2 . Subroutine dneigh calls LAPACK routines for | c | calculating eigenvalues and the last row of the eigenvector | c | matrix. | c %-------------------------------------------------------------% @@ -591,7 +591,7 @@ subroutine dnaupd ipntr(6) = ritzr ipntr(7) = ritzi ipntr(8) = bounds - ipntr(14) = iw + ipntr(14) = iw c end if c @@ -599,12 +599,12 @@ subroutine dnaupd c | Carry out the Implicitly restarted Arnoldi Iteration. | c %-------------------------------------------------------% c - call dnaup2 + call dnaup2 & ( ido, bmat, n, which, nev0, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritzr), - & workl(ritzi), workl(bounds), workl(iq), ldq, workl(iw), + & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritzr), + & workl(ritzi), workl(bounds), workl(iq), ldq, workl(iw), & ipntr, workd, info ) -c +c c %--------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP or shifts. | @@ -612,7 +612,7 @@ subroutine dnaupd c if (ido .eq. 3) iparam(8) = np if (ido .ne. 99) go to 9000 -c +c iparam(3) = mxiter iparam(5) = np iparam(9) = nopx @@ -621,26 +621,26 @@ subroutine dnaupd c c %------------------------------------% c | Exit if there was an informational | -c | error within dnaup2. | +c | error within dnaup2 . | c %------------------------------------% c if (info .lt. 0) go to 9000 if (info .eq. 2) info = 3 c if (msglvl .gt. 0) then - call ivout (logfil, 1, mxiter, ndigit, + call ivout (logfil, 1, [mxiter], ndigit, & '_naupd: Number of update iterations taken') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naupd: Number of wanted "converged" Ritz values') - call dvout (logfil, np, workl(ritzr), ndigit, + call dvout (logfil, np, workl(ritzr), ndigit, & '_naupd: Real part of the final Ritz values') - call dvout (logfil, np, workl(ritzi), ndigit, + call dvout (logfil, np, workl(ritzi), ndigit, & '_naupd: Imaginary part of the final Ritz values') - call dvout (logfil, np, workl(bounds), ndigit, + call dvout (logfil, np, workl(bounds), ndigit, & '_naupd: Associated Ritz estimates') end if c - call second (t1) + call arscnd (t1) tnaupd = t1 - t0 c if (msglvl .gt. 0) then @@ -656,8 +656,8 @@ subroutine dnaupd 1000 format (//, & 5x, '=============================================',/ & 5x, '= Nonsymmetric implicit Arnoldi update code =',/ - & 5x, '= Version Number: ', ' 2.4', 21x, ' =',/ - & 5x, '= Version Date: ', ' 07/31/96', 16x, ' =',/ + & 5x, '= Version Number: ', ' 2.4' , 21x, ' =',/ + & 5x, '= Version Date: ', ' 07/31/96' , 16x, ' =',/ & 5x, '=============================================',/ & 5x, '= Summary of timing statistics =',/ & 5x, '=============================================',//) @@ -687,7 +687,7 @@ subroutine dnaupd return c c %---------------% -c | End of dnaupd | +c | End of dnaupd | c %---------------% c end diff --git a/Toolbox/arpack-src/dnconv.f b/Toolbox/arpack-src/dnconv.f index 56fbd2e79..4d531f865 100644 --- a/Toolbox/arpack-src/dnconv.f +++ b/Toolbox/arpack-src/dnconv.f @@ -3,7 +3,7 @@ c c\Name: dnconv c -c\Description: +c\Description: c Convergence testing for the nonsymmetric Arnoldi eigenvalue routine. c c\Usage: @@ -38,22 +38,22 @@ c xxxxxx real c c\Routines called: -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dlamch LAPACK routine that determines machine constants. c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University +c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: nconv.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\Remarks @@ -69,8 +69,8 @@ subroutine dnconv (n, ritzr, ritzi, bounds, tol, nconv) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -106,7 +106,7 @@ subroutine dnconv (n, ritzr, ritzi, bounds, tol, nconv) c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %-------------------------------------------------------------% c | Convergence test: unlike in the symmetric code, I am not | c | using things like refined error bounds and gap condition | @@ -119,7 +119,7 @@ subroutine dnconv (n, ritzr, ritzi, bounds, tol, nconv) c | for some appropriate choice of norm. | c %-------------------------------------------------------------% c - call second (t0) + call arscnd (t0) c c %---------------------------------% c | Get machine dependent constant. | @@ -133,10 +133,10 @@ subroutine dnconv (n, ritzr, ritzi, bounds, tol, nconv) temp = max( eps23, dlapy2( ritzr(i), ritzi(i) ) ) if (bounds(i) .le. tol*temp) nconv = nconv + 1 20 continue -c - call second (t1) +c + call arscnd (t1) tnconv = tnconv + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/dneigh.f b/Toolbox/arpack-src/dneigh.f index 949e777a8..3c49e32bf 100644 --- a/Toolbox/arpack-src/dneigh.f +++ b/Toolbox/arpack-src/dneigh.f @@ -13,7 +13,7 @@ c c\Arguments c RNORM Double precision scalar. (INPUT) -c Residual norm corresponding to the current upper Hessenberg +c Residual norm corresponding to the current upper Hessenberg c matrix H. c c N Integer. (INPUT) @@ -27,13 +27,13 @@ c program. c c RITZR, Double precision arrays of length N. (OUTPUT) -c RITZI On output, RITZR(1:N) (resp. RITZI(1:N)) contains the real +c RITZI On output, RITZR(1:N) (resp. RITZI(1:N)) contains the real c (respectively imaginary) parts of the eigenvalues of H. c c BOUNDS Double precision array of length N. (OUTPUT) c On output, BOUNDS contains the Ritz estimates associated with -c the eigenvalues RITZR and RITZI. This is equal to RNORM -c times the last components of the eigenvectors corresponding +c the eigenvalues RITZR and RITZI. This is equal to RNORM +c times the last components of the eigenvectors corresponding c to the eigenvalues in RITZR and RITZI. c c Q Double precision N by N array. (WORKSPACE) @@ -49,7 +49,7 @@ c of H and also in the calculation of the eigenvectors of H. c c IERR Integer. (OUTPUT) -c Error exit flag from dlaqrb or dtrevc. +c Error exit flag from dlahqr or dtrevc. c c\EndDoc c @@ -61,9 +61,9 @@ c xxxxxx real c c\Routines called: -c dlaqrb ARPACK routine to compute the real Schur form of an +c dlahqr LAPACK routine to compute the real Schur form of an c upper Hessenberg matrix and last row of the Schur vectors. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dmout ARPACK utility routine that prints matrices c dvout ARPACK utility routine that prints vectors. c dlacpy LAPACK matrix copy routine. @@ -74,20 +74,20 @@ c dcopy Level 1 BLAS that copies one vector to another . c dnrm2 Level 1 BLAS that computes the norm of a vector. c dscal Level 1 BLAS that scales a vector. -c +c c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: neigh.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\Remarks @@ -97,54 +97,54 @@ c c----------------------------------------------------------------------- c - subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, + subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, & q, ldq, workl, ierr) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | c %------------------% c integer ierr, n, ldh, ldq - Double precision + Double precision & rnorm c c %-----------------% c | Array Arguments | c %-----------------% c - Double precision + Double precision & bounds(n), h(ldh,n), q(ldq,n), ritzi(n), ritzr(n), & workl(n*(n+3)) -c +c c %------------% c | Parameters | c %------------% c - Double precision + Double precision & one, zero parameter (one = 1.0D+0, zero = 0.0D+0) -c +c c %------------------------% c | Local Scalars & Arrays | c %------------------------% c logical select(1) integer i, iconj, msglvl - Double precision + Double precision & temp, vl(1) c c %----------------------% c | External Subroutines | c %----------------------% c - external dcopy, dlacpy, dlaqrb, dtrevc, dvout, second + external dcopy, dlacpy, dlahqr, dtrevc, dvout, arscnd c c %--------------------% c | External Functions | @@ -170,25 +170,29 @@ subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mneigh -c +c if (msglvl .gt. 2) then - call dmout (logfil, n, n, h, ldh, ndigit, + call dmout (logfil, n, n, h, ldh, ndigit, & '_neigh: Entering upper Hessenberg matrix H ') end if -c +c c %-----------------------------------------------------------% c | 1. Compute the eigenvalues, the last components of the | c | corresponding Schur vectors and the full Schur form T | c | of the current upper Hessenberg matrix H. | -c | dlaqrb returns the full Schur form of H in WORKL(1:N**2) | +c | dlahqr returns the full Schur form of H in WORKL(1:N**2) | c | and the last components of the Schur vectors in BOUNDS. | c %-----------------------------------------------------------% c call dlacpy ('All', n, n, h, ldh, workl, n) - call dlaqrb (.true., n, 1, n, workl, n, ritzr, ritzi, bounds, - & ierr) + do 5 j = 1, n-1 + bounds(j) = zero + 5 continue + bounds(n) = 1 + call dlahqr(.true., .true., n, 1, n, workl, n, ritzr, ritzi, 1, 1, + & bounds, 1, ierr) if (ierr .ne. 0) go to 9000 c if (msglvl .gt. 1) then @@ -227,7 +231,7 @@ subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c %----------------------% c | Real eigenvalue case | c %----------------------% -c +c temp = dnrm2( n, q(1,i), 1 ) call dscal ( n, one / temp, q(1,i), 1 ) else @@ -241,7 +245,7 @@ subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c %-------------------------------------------% c if (iconj .eq. 0) then - temp = dlapy2( dnrm2( n, q(1,i), 1 ), + temp = dlapy2( dnrm2( n, q(1,i), 1 ), & dnrm2( n, q(1,i+1), 1 ) ) call dscal ( n, one / temp, q(1,i), 1 ) call dscal ( n, one / temp, q(1,i+1), 1 ) @@ -249,7 +253,7 @@ subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, else iconj = 0 end if - end if + end if 10 continue c call dgemv ('T', n, n, one, q, ldq, bounds, 1, zero, workl, 1) @@ -270,7 +274,7 @@ subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c %----------------------% c | Real eigenvalue case | c %----------------------% -c +c bounds(i) = rnorm * abs( workl(i) ) else c @@ -301,7 +305,7 @@ subroutine dneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, & '_neigh: Ritz estimates for the eigenvalues of H') end if c - call second (t1) + call arscnd (t1) tneigh = tneigh + (t1 - t0) c 9000 continue diff --git a/Toolbox/arpack-src/dneupd.f b/Toolbox/arpack-src/dneupd.f index cdb163c81..860ceb856 100644 --- a/Toolbox/arpack-src/dneupd.f +++ b/Toolbox/arpack-src/dneupd.f @@ -1,8 +1,8 @@ c\BeginDoc c -c\Name: dneupd +c\Name: dneupd c -c\Description: +c\Description: c c This subroutine returns the converged approximations to eigenvalues c of A*z = lambda*B*z and (optionally): @@ -28,34 +28,34 @@ c invariant subspace corresponding to these Ritz values is referred to as a c Schur basis. c -c See documentation in the header of the subroutine DNAUPD for +c See documentation in the header of the subroutine DNAUPD for c definition of OP as well as other terms and the relation of computed c Ritz values and Ritz vectors of OP with respect to the given problem -c A*z = lambda*B*z. For a brief description, see definitions of +c A*z = lambda*B*z. For a brief description, see definitions of c IPARAM(7), MODE and WHICH in the documentation of DNAUPD . c c\Usage: -c call dneupd -c ( RVEC, HOWMNY, SELECT, DR, DI, Z, LDZ, SIGMAR, SIGMAI, WORKEV, BMAT, -c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, WORKL, +c call dneupd +c ( RVEC, HOWMNY, SELECT, DR, DI, Z, LDZ, SIGMAR, SIGMAI, WORKEV, BMAT, +c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, WORKL, c LWORKL, INFO ) c c\Arguments: -c RVEC LOGICAL (INPUT) -c Specifies whether a basis for the invariant subspace corresponding -c to the converged Ritz value approximations for the eigenproblem +c RVEC LOGICAL (INPUT) +c Specifies whether a basis for the invariant subspace corresponding +c to the converged Ritz value approximations for the eigenproblem c A*z = lambda*B*z is computed. c c RVEC = .FALSE. Compute Ritz values only. c c RVEC = .TRUE. Compute the Ritz vectors or Schur vectors. -c See Remarks below. -c -c HOWMNY Character*1 (INPUT) -c Specifies the form of the basis for the invariant subspace +c See Remarks below. +c +c HOWMNY Character*1 (INPUT) +c Specifies the form of the basis for the invariant subspace c corresponding to the converged Ritz values that is to be computed. c -c = 'A': Compute NEV Ritz vectors; +c = 'A': Compute NEV Ritz vectors; c = 'P': Compute NEV Schur vectors; c = 'S': compute some of the Ritz vectors, specified c by the logical array SELECT. @@ -63,43 +63,43 @@ c SELECT Logical array of dimension NCV. (INPUT) c If HOWMNY = 'S', SELECT specifies the Ritz vectors to be c computed. To select the Ritz vector corresponding to a -c Ritz value (DR(j), DI(j)), SELECT(j) must be set to .TRUE.. +c Ritz value (DR(j), DI(j)), SELECT(j) must be set to .TRUE.. c If HOWMNY = 'A' or 'P', SELECT is used as internal workspace. c c DR Double precision array of dimension NEV+1. (OUTPUT) -c If IPARAM(7) = 1,2 or 3 and SIGMAI=0.0 then on exit: DR contains -c the real part of the Ritz approximations to the eigenvalues of -c A*z = lambda*B*z. +c If IPARAM(7) = 1,2 or 3 and SIGMAI=0.0 then on exit: DR contains +c the real part of the Ritz approximations to the eigenvalues of +c A*z = lambda*B*z. c If IPARAM(7) = 3, 4 and SIGMAI is not equal to zero, then on exit: -c DR contains the real part of the Ritz values of OP computed by +c DR contains the real part of the Ritz values of OP computed by c DNAUPD . A further computation must be performed by the user c to transform the Ritz values computed for OP by DNAUPD to those c of the original system A*z = lambda*B*z. See remark 3 below. c c DI Double precision array of dimension NEV+1. (OUTPUT) -c On exit, DI contains the imaginary part of the Ritz value +c On exit, DI contains the imaginary part of the Ritz value c approximations to the eigenvalues of A*z = lambda*B*z associated c with DR. c -c NOTE: When Ritz values are complex, they will come in complex -c conjugate pairs. If eigenvectors are requested, the -c corresponding Ritz vectors will also come in conjugate -c pairs and the real and imaginary parts of these are -c represented in two consecutive columns of the array Z +c NOTE: When Ritz values are complex, they will come in complex +c conjugate pairs. If eigenvectors are requested, the +c corresponding Ritz vectors will also come in conjugate +c pairs and the real and imaginary parts of these are +c represented in two consecutive columns of the array Z c (see below). c c Z Double precision N by NEV+1 array if RVEC = .TRUE. and HOWMNY = 'A'. (OUTPUT) -c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of -c Z represent approximate eigenvectors (Ritz vectors) corresponding -c to the NCONV=IPARAM(5) Ritz values for eigensystem -c A*z = lambda*B*z. -c -c The complex Ritz vector associated with the Ritz value -c with positive imaginary part is stored in two consecutive -c columns. The first column holds the real part of the Ritz -c vector and the second column holds the imaginary part. The -c Ritz vector associated with the Ritz value with negative -c imaginary part is simply the complex conjugate of the Ritz vector +c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of +c Z represent approximate eigenvectors (Ritz vectors) corresponding +c to the NCONV=IPARAM(5) Ritz values for eigensystem +c A*z = lambda*B*z. +c +c The complex Ritz vector associated with the Ritz value +c with positive imaginary part is stored in two consecutive +c columns. The first column holds the real part of the Ritz +c vector and the second column holds the imaginary part. The +c Ritz vector associated with the Ritz value with negative +c imaginary part is simply the complex conjugate of the Ritz vector c associated with the positive imaginary part. c c If RVEC = .FALSE. or HOWMNY = 'P', then Z is not referenced. @@ -114,11 +114,11 @@ c desired, then LDZ >= max( 1, N ). In any case, LDZ >= 1. c c SIGMAR Double precision (INPUT) -c If IPARAM(7) = 3 or 4, represents the real part of the shift. +c If IPARAM(7) = 3 or 4, represents the real part of the shift. c Not referenced if IPARAM(7) = 1 or 2. c c SIGMAI Double precision (INPUT) -c If IPARAM(7) = 3 or 4, represents the imaginary part of the shift. +c If IPARAM(7) = 3 or 4, represents the imaginary part of the shift. c Not referenced if IPARAM(7) = 1 or 2. See remark 3 below. c c WORKEV Double precision work array of dimension 3*NCV. (WORKSPACE) @@ -181,12 +181,12 @@ c c = 0: Normal exit. c -c = 1: The Schur form computed by LAPACK routine dlahqr +c = 1: The Schur form computed by LAPACK routine dlahqr c could not be reordered by LAPACK routine dtrsen . -c Re-enter subroutine dneupd with IPARAM(5)=NCV and -c increase the size of the arrays DR and DI to have -c dimension at least dimension NCV and allocate at least NCV -c columns for Z. NOTE: Not necessary if Z and V share +c Re-enter subroutine dneupd with IPARAM(5)=NCV and +c increase the size of the arrays DR and DI to have +c dimension at least dimension NCV and allocate at least NCV +c columns for Z. NOTE: Not necessary if Z and V share c the same space. Please notify the authors if this error c occurs. c @@ -218,7 +218,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett & Y. Saad, "Complex Shift and Invert Strategies for @@ -229,7 +229,7 @@ c ivout ARPACK utility routine that prints integers. c dmout ARPACK utility routine that prints matrices c dvout ARPACK utility routine that prints vectors. -c dgeqr2 LAPACK routine that computes the QR factorization of +c dgeqr2 LAPACK routine that computes the QR factorization of c a matrix. c dlacpy LAPACK matrix copy routine. c dlahqr LAPACK routine to compute the real Schur form of an @@ -237,7 +237,7 @@ c dlamch LAPACK routine that determines machine constants. c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c dlaset LAPACK matrix initialization routine. -c dorm2r LAPACK routine that applies an orthogonal matrix in +c dorm2r LAPACK routine that applies an orthogonal matrix in c factored form. c dtrevc LAPACK routine to compute the eigenvectors of a matrix c in upper quasi-triangular form. @@ -259,10 +259,10 @@ c Ritz vectors. Thus, their numerical properties are often superior. c If RVEC = .TRUE. then the relationship c A * V(:,1:IPARAM(5)) = V(:,1:IPARAM(5)) * T, and -c trans(V(:,1:IPARAM(5))) * V(:,1:IPARAM(5)) = I are approximately -c satisfied. Here T is the leading submatrix of order IPARAM(5) of the +c trans(V(:,1:IPARAM(5))) * V(:,1:IPARAM(5)) = I are approximately +c satisfied. Here T is the leading submatrix of order IPARAM(5) of the c real upper quasi-triangular matrix stored workl(ipntr(12)). That is, -c T is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +c T is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; c each 2-by-2 diagonal block has its diagonal elements equal and its c off-diagonal elements of opposite sign. Corresponding to each 2-by-2 c diagonal block is a complex conjugate pair of Ritz values. The real @@ -270,14 +270,14 @@ c c 3. If IPARAM(7) = 3 or 4 and SIGMAI is not equal zero, then the user must c form the IPARAM(5) Rayleigh quotients in order to transform the Ritz -c values computed by DNAUPD for OP to those of A*z = lambda*B*z. +c values computed by DNAUPD for OP to those of A*z = lambda*B*z. c Set RVEC = .true. and HOWMNY = 'A', and -c compute +c compute c trans(Z(:,I)) * A * Z(:,I) if DI(I) = 0. -c If DI(I) is not equal to zero and DI(I+1) = - D(I), +c If DI(I) is not equal to zero and DI(I+1) = - D(I), c then the desired real and imaginary parts of the Ritz value are c trans(Z(:,I)) * A * Z(:,I) + trans(Z(:,I+1)) * A * Z(:,I+1), -c trans(Z(:,I)) * A * Z(:,I+1) - trans(Z(:,I+1)) * A * Z(:,I), +c trans(Z(:,I)) * A * Z(:,I+1) - trans(Z(:,I+1)) * A * Z(:,I), c respectively. c Another possibility is to set RVEC = .true. and HOWMNY = 'P' and c compute trans(V(:,1:IPARAM(5))) * A * V(:,1:IPARAM(5)) and then an upper @@ -286,20 +286,20 @@ c c\Authors c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University +c Richard Lehoucq CRPC / Rice University c Chao Yang Houston, Texas c Dept. of Computational & -c Applied Mathematics -c Rice University -c Houston, Texas -c -c\SCCS Information: @(#) -c FILE: neupd.F SID: 2.7 DATE OF SID: 09/20/00 RELEASE: 2 +c Applied Mathematics +c Rice University +c Houston, Texas +c +c\SCCS Information: @(#) +c FILE: neupd.F SID: 2.7 DATE OF SID: 09/20/00 RELEASE: 2 c c\EndLib c c----------------------------------------------------------------------- - subroutine dneupd (rvec , howmny, select, dr , di, + subroutine dneupd (rvec , howmny, select, dr , di, & z , ldz , sigmar, sigmai, workev, & bmat , n , which , nev , tol, & resid, ncv , v , ldv , iparam, @@ -309,8 +309,8 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -319,7 +319,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, character bmat, howmny, which*2 logical rvec integer info, ldz, ldv, lworkl, n, ncv, nev - Double precision + Double precision & sigmar, sigmai, tol c c %-----------------% @@ -328,16 +328,16 @@ subroutine dneupd (rvec , howmny, select, dr , di, c integer iparam(11), ipntr(14) logical select(ncv) - Double precision - & dr(nev+1) , di(nev+1), resid(n) , - & v(ldv,ncv) , z(ldz,*) , workd(3*n), + Double precision + & dr(nev+1) , di(nev+1), resid(n) , + & v(ldv,ncv) , z(ldz,*) , workd(3*n), & workl(lworkl), workev(3*ncv) c c %------------% c | Parameters | c %------------% c - Double precision + Double precision & one, zero parameter (one = 1.0D+0 , zero = 0.0D+0 ) c @@ -346,16 +346,16 @@ subroutine dneupd (rvec , howmny, select, dr , di, c %---------------% c character type*6 - integer bounds, ierr , ih , ihbds , - & iheigr, iheigi, iconj , nconv , + integer bounds, ierr , ih , ihbds , + & iheigr, iheigi, iconj , nconv , & invsub, iuptri, iwev , iwork(1), & j , k , ldh , ldq , & mode , msglvl, outncv, ritzr , & ritzi , wri , wrr , irr , & iri , ibd , ishift, numcnv , - & np , jj + & np , jj , nconv2 logical reord - Double precision + Double precision & conds , rnorm, sep , temp, & vl(1,1), temp1, eps23 c @@ -363,18 +363,18 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | External Subroutines | c %----------------------% c - external dcopy , dger , dgeqr2 , dlacpy , - & dlahqr , dlaset , dmout , dorm2r , - & dtrevc , dtrmm , dtrsen , dscal , + external dcopy , dger , dgeqr2 , dlacpy , + & dlahqr , dlaset , dmout , dorm2r , + & dtrevc , dtrmm , dtrsen , dscal , & dvout , ivout c c %--------------------% c | External Functions | c %--------------------% c - Double precision - & dlapy2 , dnrm2 , dlamch , ddot - external dlapy2 , dnrm2 , dlamch , ddot + Double precision + & dlapy2 , dnrm2 , dlamch , ddot + external dlapy2 , dnrm2 , dlamch , ddot c c %---------------------% c | Intrinsic Functions | @@ -385,7 +385,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -434,7 +434,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, else if (howmny .eq. 'S' ) then ierr = -12 end if -c +c if (mode .eq. 1 .or. mode .eq. 2) then type = 'REGULR' else if (mode .eq. 3 .and. sigmai .eq. zero) then @@ -443,7 +443,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, type = 'REALPT' else if (mode .eq. 4 ) then type = 'IMAGPT' - else + else ierr = -10 end if if (mode .eq. 1 .and. bmat .eq. 'G') ierr = -11 @@ -456,7 +456,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, info = ierr go to 9000 end if -c +c c %--------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -483,7 +483,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | subspace for H. | c | GRAND total of NCV * ( 3 * NCV + 6 ) locations. | c %-----------------------------------------------------------% -c +c ih = ipntr(5) ritzr = ipntr(6) ritzi = ipntr(7) @@ -537,7 +537,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, end if c if (rvec) then -c +c reord = .false. c c %---------------------------------------------------% @@ -562,7 +562,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, c np = ncv - nev ishift = 0 - call dngets (ishift , which , nev , + call dngets (ishift , which , nev , & np , workl(irr), workl(iri), & workl(bounds), workl , workl(np+1)) c @@ -589,7 +589,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, & workl(ibd+jj-1) .le. tol*temp1) then select(jj) = .true. numcnv = numcnv + 1 - if (jj .gt. nev) reord = .true. + if (jj .gt. nconv) reord = .true. endif 11 continue c @@ -601,9 +601,9 @@ subroutine dneupd (rvec , howmny, select, dr , di, c %-----------------------------------------------------------% c if (msglvl .gt. 2) then - call ivout(logfil, 1, numcnv, ndigit, + call ivout(logfil, 1, [numcnv], ndigit, & '_neupd: Number of specified eigenvalues') - call ivout(logfil, 1, nconv, ndigit, + call ivout(logfil, 1, [nconv], ndigit, & '_neupd: Number of "converged" eigenvalues') end if c @@ -618,24 +618,24 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | Make a copy of the upper Hessenberg matrix. | c | Initialize the Schur vector matrix Q to the identity. | c %-----------------------------------------------------------% -c +c call dcopy (ldh*ncv, workl(ih), 1, workl(iuptri), 1) - call dlaset ('All', ncv, ncv, + call dlaset ('All', ncv, ncv, & zero , one, workl(invsub), & ldq) - call dlahqr (.true., .true. , ncv, - & 1 , ncv , workl(iuptri), + call dlahqr (.true., .true. , ncv, + & 1 , ncv , workl(iuptri), & ldh , workl(iheigr), workl(iheigi), - & 1 , ncv , workl(invsub), + & 1 , ncv , workl(invsub), & ldq , ierr) - call dcopy (ncv , workl(invsub+ncv-1), ldq, + call dcopy (ncv , workl(invsub+ncv-1), ldq, & workl(ihbds), 1) -c +c if (ierr .ne. 0) then info = -8 go to 9000 end if -c +c if (msglvl .gt. 1) then call dvout (logfil, ncv, workl(iheigr), ndigit, & '_neupd: Real part of the eigenvalues of H') @@ -644,45 +644,50 @@ subroutine dneupd (rvec , howmny, select, dr , di, call dvout (logfil, ncv, workl(ihbds), ndigit, & '_neupd: Last row of the Schur vector matrix') if (msglvl .gt. 3) then - call dmout (logfil , ncv, ncv , + call dmout (logfil , ncv, ncv , & workl(iuptri), ldh, ndigit, & '_neupd: The upper quasi-triangular matrix ') end if - end if + end if c if (reord) then -c +c c %-----------------------------------------------------% -c | Reorder the computed upper quasi-triangular matrix. | +c | Reorder the computed upper quasi-triangular matrix. | c %-----------------------------------------------------% -c - call dtrsen ('None' , 'V' , +c + call dtrsen ('None' , 'V' , & select , ncv , - & workl(iuptri), ldh , - & workl(invsub), ldq , - & workl(iheigr), workl(iheigi), - & nconv , conds , - & sep , workl(ihbds) , + & workl(iuptri), ldh , + & workl(invsub), ldq , + & workl(iheigr), workl(iheigi), + & nconv2 , conds , + & sep , workl(ihbds) , & ncv , iwork , & 1 , ierr) c + if (nconv2 .lt. nconv) then + nconv = nconv2 + end if + if (ierr .eq. 1) then info = 1 go to 9000 end if c + if (msglvl .gt. 2) then call dvout (logfil, ncv, workl(iheigr), ndigit, & '_neupd: Real part of the eigenvalues of H--reordered') call dvout (logfil, ncv, workl(iheigi), ndigit, & '_neupd: Imag part of the eigenvalues of H--reordered') if (msglvl .gt. 3) then - call dmout (logfil , ncv, ncv , + call dmout (logfil , ncv, ncv , & workl(iuptri), ldq, ndigit, & '_neupd: Quasi-triangular matrix after re-ordering') end if end if -c +c end if c c %---------------------------------------% @@ -699,23 +704,23 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | if a spectral transformation was not used. | c %----------------------------------------------------% c - if (type .eq. 'REGULR') then + if (type .eq. 'REGULR') then call dcopy (nconv, workl(iheigr), 1, dr, 1) call dcopy (nconv, workl(iheigi), 1, di, 1) end if -c +c c %----------------------------------------------------------% c | Compute the QR factorization of the matrix representing | c | the wanted invariant subspace located in the first NCONV | c | columns of workl(invsub,ldq). | c %----------------------------------------------------------% -c - call dgeqr2 (ncv, nconv , workl(invsub), +c + call dgeqr2 (ncv, nconv , workl(invsub), & ldq, workev, workev(ncv+1), & ierr) c c %---------------------------------------------------------% -c | * Postmultiply V by Q using dorm2r . | +c | * Postmultiply V by Q using dorm2r . | c | * Copy the first NCONV columns of VQ into Z. | c | * Postmultiply Z by R. | c | The N by NCONV matrix Z is now a matrix representation | @@ -725,15 +730,15 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | vectors associated with the real upper quasi-triangular | c | matrix of order NCONV in workl(iuptri) | c %---------------------------------------------------------% -c - call dorm2r ('Right', 'Notranspose', n , +c + call dorm2r ('Right', 'Notranspose', n , & ncv , nconv , workl(invsub), - & ldq , workev , v , + & ldq , workev , v , & ldv , workd(n+1) , ierr) call dlacpy ('All', n, nconv, v, ldv, z, ldz) c do 20 j=1, nconv -c +c c %---------------------------------------------------% c | Perform both a column and row scaling if the | c | diagonal element of workl(invsub,ldq) is negative | @@ -742,21 +747,21 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | Note that since Q is orthogonal, R is a diagonal | c | matrix consisting of plus or minus ones | c %---------------------------------------------------% -c +c if (workl(invsub+(j-1)*ldq+j-1) .lt. zero) then call dscal (nconv, -one, workl(iuptri+j-1), ldq) call dscal (nconv, -one, workl(iuptri+(j-1)*ldq), 1) end if -c +c 20 continue -c +c if (howmny .eq. 'A') then -c +c c %--------------------------------------------% -c | Compute the NCONV wanted eigenvectors of T | +c | Compute the NCONV wanted eigenvectors of T | c | located in workl(iuptri,ldq). | c %--------------------------------------------% -c +c do 30 j=1, ncv if (j .le. nconv) then select(j) = .true. @@ -765,8 +770,8 @@ subroutine dneupd (rvec , howmny, select, dr , di, end if 30 continue c - call dtrevc ('Right', 'Select' , select , - & ncv , workl(iuptri), ldq , + call dtrevc ('Right', 'Select' , select , + & ncv , workl(iuptri), ldq , & vl , 1 , workl(invsub), & ldq , ncv , outncv , & workev , ierr) @@ -775,7 +780,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, info = -9 go to 9000 end if -c +c c %------------------------------------------------% c | Scale the returning eigenvectors so that their | c | Euclidean norms are all one. LAPACK subroutine | @@ -783,22 +788,22 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | that the element of largest magnitude has | c | magnitude 1; | c %------------------------------------------------% -c +c iconj = 0 do 40 j=1, nconv c if ( workl(iheigi+j-1) .eq. zero ) then -c +c c %----------------------% c | real eigenvalue case | c %----------------------% -c +c temp = dnrm2 ( ncv, workl(invsub+(j-1)*ldq), 1 ) - call dscal ( ncv, one / temp, + call dscal ( ncv, one / temp, & workl(invsub+(j-1)*ldq), 1 ) c else -c +c c %-------------------------------------------% c | Complex conjugate pair case. Note that | c | since the real and imaginary part of | @@ -808,15 +813,15 @@ subroutine dneupd (rvec , howmny, select, dr , di, c %-------------------------------------------% c if (iconj .eq. 0) then - temp = dlapy2 (dnrm2 (ncv, - & workl(invsub+(j-1)*ldq), + temp = dlapy2 (dnrm2 (ncv, + & workl(invsub+(j-1)*ldq), & 1), - & dnrm2 (ncv, + & dnrm2 (ncv, & workl(invsub+j*ldq), - & 1)) - call dscal (ncv, one/temp, + & 1)) + call dscal (ncv, one/temp, & workl(invsub+(j-1)*ldq), 1 ) - call dscal (ncv, one/temp, + call dscal (ncv, one/temp, & workl(invsub+j*ldq), 1 ) iconj = 1 else @@ -856,7 +861,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, call dvout (logfil, ncv, workl(ihbds), ndigit, & '_neupd: Last row of the eigenvector matrix for T') if (msglvl .gt. 3) then - call dmout (logfil, ncv, ncv, workl(invsub), ldq, + call dmout (logfil, ncv, ncv, workl(invsub), ldq, & ndigit, '_neupd: The eigenvector matrix for T') end if end if @@ -872,32 +877,32 @@ subroutine dneupd (rvec , howmny, select, dr , di, c | associated with leading portion of T in the first NCONV | c | columns of workl(invsub,ldq). | c %---------------------------------------------------------% -c - call dgeqr2 (ncv, nconv , workl(invsub), +c + call dgeqr2 (ncv, nconv , workl(invsub), & ldq, workev, workev(ncv+1), & ierr) -c +c c %----------------------------------------------% -c | * Postmultiply Z by Q. | +c | * Postmultiply Z by Q. | c | * Postmultiply Z by R. | -c | The N by NCONV matrix Z is now contains the | +c | The N by NCONV matrix Z is now contains the | c | Ritz vectors associated with the Ritz values | c | in workl(iheigr) and workl(iheigi). | c %----------------------------------------------% -c +c call dorm2r ('Right', 'Notranspose', n , & ncv , nconv , workl(invsub), & ldq , workev , z , & ldz , workd(n+1) , ierr) -c +c call dtrmm ('Right' , 'Upper' , 'No transpose', & 'Non-unit', n , nconv , & one , workl(invsub), ldq , & z , ldz) -c +c end if -c - else +c + else c c %------------------------------------------------------% c | An approximate invariant subspace is not needed. | @@ -910,7 +915,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, call dcopy (nconv, workl(ritzi), 1, workl(iheigi), 1) call dcopy (nconv, workl(bounds), 1, workl(ihbds), 1) end if -c +c c %------------------------------------------------% c | Transform the Ritz values and possibly vectors | c | and corresponding error bounds of OP to those | @@ -919,26 +924,26 @@ subroutine dneupd (rvec , howmny, select, dr , di, c if (type .eq. 'REGULR') then c - if (rvec) - & call dscal (ncv, rnorm, workl(ihbds), 1) -c - else -c + if (rvec) + & call dscal (ncv, rnorm, workl(ihbds), 1) +c + else +c c %---------------------------------------% c | A spectral transformation was used. | c | * Determine the Ritz estimates of the | c | Ritz values in the original system. | c %---------------------------------------% -c +c if (type .eq. 'SHIFTI') then c - if (rvec) + if (rvec) & call dscal (ncv, rnorm, workl(ihbds), 1) c do 50 k=1, ncv - temp = dlapy2 ( workl(iheigr+k-1), + temp = dlapy2 ( workl(iheigr+k-1), & workl(iheigi+k-1) ) - workl(ihbds+k-1) = abs( workl(ihbds+k-1) ) + workl(ihbds+k-1) = abs( workl(ihbds+k-1) ) & / temp / temp 50 continue c @@ -953,26 +958,26 @@ subroutine dneupd (rvec , howmny, select, dr , di, 70 continue c end if -c +c c %-----------------------------------------------------------% c | * Transform the Ritz values back to the original system. | c | For TYPE = 'SHIFTI' the transformation is | c | lambda = 1/theta + sigma | c | For TYPE = 'REALPT' or 'IMAGPT' the user must from | -c | Rayleigh quotients or a projection. See remark 3 above.| +c | Rayleigh quotients or a projection. See remark 3 above.| c | NOTES: | c | *The Ritz vectors are not affected by the transformation. | c %-----------------------------------------------------------% -c - if (type .eq. 'SHIFTI') then +c + if (type .eq. 'SHIFTI') then c do 80 k=1, ncv - temp = dlapy2 ( workl(iheigr+k-1), + temp = dlapy2 ( workl(iheigr+k-1), & workl(iheigi+k-1) ) - workl(iheigr+k-1) = workl(iheigr+k-1)/temp/temp - & + sigmar + workl(iheigr+k-1) = workl(iheigr+k-1)/temp/temp + & + sigmar workl(iheigi+k-1) = -workl(iheigi+k-1)/temp/temp - & + sigmai + & + sigmai 80 continue c call dcopy (nconv, workl(iheigr), 1, dr, 1) @@ -989,9 +994,9 @@ subroutine dneupd (rvec , howmny, select, dr , di, c if (type .eq. 'SHIFTI' .and. msglvl .gt. 1) then call dvout (logfil, nconv, dr, ndigit, - & '_neupd: Untransformed real part of the Ritz valuess.') + & '_neupd: Untransformed real part of the Ritz values.') call dvout (logfil, nconv, di, ndigit, - & '_neupd: Untransformed imag part of the Ritz valuess.') + & '_neupd: Untransformed imag part of the Ritz values.') call dvout (logfil, nconv, workl(ihbds), ndigit, & '_neupd: Ritz estimates of untransformed Ritz values.') else if (type .eq. 'REGULR' .and. msglvl .gt. 1) then @@ -1002,7 +1007,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, call dvout (logfil, nconv, workl(ihbds), ndigit, & '_neupd: Associated Ritz estimates.') end if -c +c c %-------------------------------------------------% c | Eigenvector Purification step. Formally perform | c | one of inverse subspace iteration. Only used | @@ -1024,19 +1029,22 @@ subroutine dneupd (rvec , howmny, select, dr , di, c iconj = 0 do 110 j=1, nconv - if (workl(iheigi+j-1) .eq. zero) then + if ((workl(iheigi+j-1) .eq. zero) .and. + & (workl(iheigr+j-1) .ne. zero)) then workev(j) = workl(invsub+(j-1)*ldq+ncv-1) / & workl(iheigr+j-1) else if (iconj .eq. 0) then temp = dlapy2 ( workl(iheigr+j-1), workl(iheigi+j-1) ) - workev(j) = ( workl(invsub+(j-1)*ldq+ncv-1) * - & workl(iheigr+j-1) + - & workl(invsub+j*ldq+ncv-1) * - & workl(iheigi+j-1) ) / temp / temp - workev(j+1) = ( workl(invsub+j*ldq+ncv-1) * - & workl(iheigr+j-1) - - & workl(invsub+(j-1)*ldq+ncv-1) * - & workl(iheigi+j-1) ) / temp / temp + if (temp .ne. zero) then + workev(j) = ( workl(invsub+(j-1)*ldq+ncv-1) * + & workl(iheigr+j-1) + + & workl(invsub+j*ldq+ncv-1) * + & workl(iheigi+j-1) ) / temp / temp + workev(j+1) = ( workl(invsub+j*ldq+ncv-1) * + & workl(iheigr+j-1) - + & workl(invsub+(j-1)*ldq+ncv-1) * + & workl(iheigi+j-1) ) / temp / temp + end if iconj = 1 else iconj = 0 @@ -1055,7 +1063,7 @@ subroutine dneupd (rvec , howmny, select, dr , di, 9000 continue c return -c +c c %---------------% c | End of DNEUPD | c %---------------% diff --git a/Toolbox/arpack-src/dngets.f b/Toolbox/arpack-src/dngets.f index d109833c8..47d3ac2ce 100644 --- a/Toolbox/arpack-src/dngets.f +++ b/Toolbox/arpack-src/dngets.f @@ -3,9 +3,9 @@ c c\Name: dngets c -c\Description: +c\Description: c Given the eigenvalues of the upper Hessenberg matrix H, -c computes the NP shifts AMU that are zeros of the polynomial of +c computes the NP shifts AMU that are zeros of the polynomial of c degree NP which filters out components of the unwanted eigenvectors c corresponding to the AMU's based on some given criteria. c @@ -42,12 +42,12 @@ c pairs together. c c RITZR, Double precision array of length KEV+NP. (INPUT/OUTPUT) -c RITZI On INPUT, RITZR and RITZI contain the real and imaginary +c RITZI On INPUT, RITZR and RITZI contain the real and imaginary c parts of the eigenvalues of H. c On OUTPUT, RITZR and RITZI are sorted so that the unwanted c eigenvalues are in the first NP locations and the wanted -c portion is in the last KEV locations. When exact shifts are -c selected, the unwanted part corresponds to the shifts to +c portion is in the last KEV locations. When exact shifts are +c selected, the unwanted part corresponds to the shifts to c be applied. Also, if ISHIFT .eq. 1, the unwanted eigenvalues c are further sorted so that the ones with largest Ritz values c are first. @@ -56,7 +56,7 @@ c Error bounds corresponding to the ordering in RITZ. c c SHIFTR, SHIFTI *** USE deprecated as of version 2.1. *** -c +c c c\EndDoc c @@ -76,13 +76,13 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: ngets.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\Remarks @@ -99,8 +99,8 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -114,7 +114,7 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c %-----------------% c Double precision - & bounds(kev+np), ritzr(kev+np), ritzi(kev+np), + & bounds(kev+np), ritzr(kev+np), ritzi(kev+np), & shiftr(1), shifti(1) c c %------------% @@ -135,7 +135,7 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | External Subroutines | c %----------------------% c - external dcopy, dsortc, second + external dcopy, dsortc, arscnd c c %----------------------% c | Intrinsics Functions | @@ -151,10 +151,10 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% -c - call second (t0) +c + call arscnd (t0) msglvl = mngets -c +c c %----------------------------------------------------% c | LM, SM, LR, SR, LI, SI case. | c | Sort the eigenvalues of H into the desired order | @@ -178,16 +178,16 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, else if (which .eq. 'SI') then call dsortc ('SM', .true., kev+np, ritzr, ritzi, bounds) end if -c +c call dsortc (which, .true., kev+np, ritzr, ritzi, bounds) -c +c c %-------------------------------------------------------% c | Increase KEV by one if the ( ritzr(np),ritzi(np) ) | c | = ( ritzr(np+1),-ritzi(np+1) ) and ritz(np) .ne. zero | c | Accordingly decrease NP by one. In other words keep | c | complex conjugate pairs together. | c %-------------------------------------------------------% -c +c if ( ( ritzr(np+1) - ritzr(np) ) .eq. zero & .and. ( ritzi(np+1) + ritzi(np) ) .eq. zero ) then np = np - 1 @@ -195,7 +195,7 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, end if c if ( ishift .eq. 1 ) then -c +c c %-------------------------------------------------------% c | Sort the unwanted Ritz values used as shifts so that | c | the ones with largest Ritz estimates are first | @@ -204,28 +204,28 @@ subroutine dngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | are applied in subroutine dnapps. | c | Be careful and use 'SR' since we want to sort BOUNDS! | c %-------------------------------------------------------% -c +c call dsortc ( 'SR', .true., np, bounds, ritzr, ritzi ) end if -c - call second (t1) +c + call arscnd (t1) tngets = tngets + (t1 - t0) c if (msglvl .gt. 0) then - call ivout (logfil, 1, kev, ndigit, '_ngets: KEV is') - call ivout (logfil, 1, np, ndigit, '_ngets: NP is') + call ivout (logfil, 1, [kev], ndigit, '_ngets: KEV is') + call ivout (logfil, 1, [np], ndigit, '_ngets: NP is') call dvout (logfil, kev+np, ritzr, ndigit, & '_ngets: Eigenvalues of current H matrix -- real part') call dvout (logfil, kev+np, ritzi, ndigit, & '_ngets: Eigenvalues of current H matrix -- imag part') - call dvout (logfil, kev+np, bounds, ndigit, + call dvout (logfil, kev+np, bounds, ndigit, & '_ngets: Ritz estimates of the current KEV+NP Ritz values') end if -c +c return -c +c c %---------------% c | End of dngets | c %---------------% -c +c end diff --git a/Toolbox/arpack-src/dsaitr.f b/Toolbox/arpack-src/dsaitr.f index 2d3026485..3460d990c 100644 --- a/Toolbox/arpack-src/dsaitr.f +++ b/Toolbox/arpack-src/dsaitr.f @@ -3,8 +3,8 @@ c c\Name: dsaitr c -c\Description: -c Reverse communication interface for applying NP additional steps to +c\Description: +c Reverse communication interface for applying NP additional steps to c a K step symmetric Arnoldi factorization. c c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T @@ -20,7 +20,7 @@ c c\Usage: c call dsaitr -c ( IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH, +c ( IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH, c IPNTR, WORKD, INFO ) c c\Arguments @@ -76,13 +76,13 @@ c On OUTPUT the B-norm of the updated residual r_{k+p}. c c V Double precision N by K+NP array. (INPUT/OUTPUT) -c On INPUT: V contains the Arnoldi vectors in the first K +c On INPUT: V contains the Arnoldi vectors in the first K c columns. c On OUTPUT: V contains the new NP Arnoldi vectors in the next c NP columns. The first K columns are unchanged. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Double precision (K+NP) by 2 array. (INPUT/OUTPUT) @@ -91,26 +91,26 @@ c and the main diagonal in the second column. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORK for +c Pointer to mark the starting locations in the WORK for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The calling program should not +c for reverse communication. The calling program should not c use WORKD as temporary workspace during the iteration !!!!!! c On INPUT, WORKD(1:N) = B*RESID where RESID is associated -c with the K step Arnoldi factorization. Used to save some -c computation at the first step. +c with the K step Arnoldi factorization. Used to save some +c computation at the first step. c On OUTPUT, WORKD(1:N) = B*RESID where RESID is associated c with the K+NP step Arnoldi factorization. c @@ -139,7 +139,7 @@ c daxpy Level 1 BLAS that computes a vector triad. c dscal Level 1 BLAS that scales a vector. c dcopy Level 1 BLAS that copies one vector to another . -c ddot Level 1 BLAS that computes the scalar product of two vectors. +c ddot Level 1 BLAS that computes the scalar product of two vectors. c dnrm2 Level 1 BLAS that computes the norm of a vector. c c\Author @@ -147,29 +147,29 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c xx/xx/93: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: saitr.F SID: 2.6 DATE OF SID: 8/28/96 RELEASE: 2 c c\Remarks c The algorithm implemented is: -c +c c restart = .false. -c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; +c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; c r_{k} contains the initial residual vector even for k = 0; -c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already +c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already c computed by the calling program. c c betaj = rnorm ; p_{k+1} = B*r_{k} ; c For j = k+1, ..., k+np Do c 1) if ( betaj < tol ) stop or restart depending on j. c if ( restart ) generate a new starting vector. -c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; +c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; c p_{j} = p_{j}/betaj c 3) r_{j} = OP*v_{j} where OP is defined as in dsaupd c For shift-invert mode p_{j} = B*v_{j} is already available. @@ -184,7 +184,7 @@ c 5) Re-orthogonalization step: c s = V_{j}'*B*r_{j} c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || -c alphaj = alphaj + s_{j}; +c alphaj = alphaj + s_{j}; c 6) Iterative refinement step: c If (rnorm1 > 0.717*rnorm) then c rnorm = rnorm1 @@ -194,7 +194,7 @@ c If this is the first time in step 6), go to 5) c Else r_{j} lies in the span of V_{j} numerically. c Set r_{j} = 0 and rnorm = 0; go to 1) -c EndIf +c EndIf c End Do c c\EndLib @@ -202,15 +202,15 @@ c----------------------------------------------------------------------- c subroutine dsaitr - & (ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh, + & (ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh, & ipntr, workd, info) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -242,7 +242,7 @@ subroutine dsaitr c %---------------% c logical first, orth1, orth2, rstart, step3, step4 - integer i, ierr, ipj, irj, ivj, iter, itry, j, msglvl, + integer i, ierr, ipj, irj, ivj, iter, itry, j, msglvl, & infol, jj Double precision & rnorm1, wnorm, safmin, temp1 @@ -251,7 +251,7 @@ subroutine dsaitr & rnorm1, safmin, wnorm c c %-----------------------% -c | Local Array Arguments | +c | Local Array Arguments | c %-----------------------% c Double precision @@ -262,7 +262,7 @@ subroutine dsaitr c %----------------------% c external daxpy, dcopy, dscal, dgemv, dgetv0, dvout, dmout, - & dlascl, ivout, second + & dlascl, ivout, arscnd c c %--------------------% c | External Functions | @@ -294,15 +294,15 @@ subroutine dsaitr end if c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msaitr -c +c c %------------------------------% c | Initial call to this routine | c %------------------------------% @@ -313,14 +313,14 @@ subroutine dsaitr rstart = .false. orth1 = .false. orth2 = .false. -c +c c %--------------------------------% c | Pointer to the current step of | c | the factorization to build | c %--------------------------------% c j = k + 1 -c +c c %------------------------------------------% c | Pointers used for reverse communication | c | when using WORKD. | @@ -330,7 +330,7 @@ subroutine dsaitr irj = ipj + n ivj = irj + n end if -c +c c %-------------------------------------------------% c | When in reverse communication mode one of: | c | STEP3, STEP4, ORTH1, ORTH2, RSTART | @@ -353,7 +353,7 @@ subroutine dsaitr c %------------------------------% c | Else this is the first step. | c %------------------------------% -c +c c %--------------------------------------------------------------% c | | c | A R N O L D I I T E R A T I O N L O O P | @@ -364,15 +364,15 @@ subroutine dsaitr 1000 continue c if (msglvl .gt. 2) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_saitr: generating Arnoldi vector no.') - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_saitr: B-norm of the current residual =') end if -c +c c %---------------------------------------------------------% -c | Check for exact zero. Equivalent to determing whether a | -c | j-step Arnoldi factorization is present. | +c | Check for exact zero. Equivalent to determining whether | +c | a j-step Arnoldi factorization is present. | c %---------------------------------------------------------% c if (rnorm .gt. zero) go to 40 @@ -384,10 +384,10 @@ subroutine dsaitr c %---------------------------------------------------% c if (msglvl .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_saitr: ****** restart at step ******') end if -c +c c %---------------------------------------------% c | ITRY is the loop variable that controls the | c | maximum amount of times that a restart is | @@ -406,7 +406,7 @@ subroutine dsaitr c | RSTART = .true. flow returns here. | c %--------------------------------------% c - call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv, + call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv, & resid, rnorm, ipntr, workd, ierr) if (ido .ne. 99) go to 9000 if (ierr .lt. 0) then @@ -420,12 +420,12 @@ subroutine dsaitr c %------------------------------------------------% c info = j - 1 - call second (t1) + call arscnd (t1) tsaitr = tsaitr + (t1 - t0) ido = 99 go to 9000 end if -c +c 40 continue c c %---------------------------------------------------------% @@ -447,12 +447,12 @@ subroutine dsaitr c | use LAPACK routine SLASCL | c %-----------------------------------------% c - call dlascl ('General', i, i, rnorm, one, n, 1, + call dlascl ('General', i, i, rnorm, one, n, 1, & v(1,j), n, infol) - call dlascl ('General', i, i, rnorm, one, n, 1, + call dlascl ('General', i, i, rnorm, one, n, 1, & workd(ipj), n, infol) end if -c +c c %------------------------------------------------------% c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | c | Note that this is not quite yet r_{j}. See STEP 4 | @@ -460,28 +460,28 @@ subroutine dsaitr c step3 = .true. nopx = nopx + 1 - call second (t2) + call arscnd (t2) call dcopy (n, v(1,j), 1, workd(ivj), 1) ipntr(1) = ivj ipntr(2) = irj ipntr(3) = ipj ido = 1 -c +c c %-----------------------------------% c | Exit in order to compute OP*v_{j} | c %-----------------------------------% -c +c go to 9000 50 continue -c +c c %-----------------------------------% c | Back from reverse communication; | c | WORKD(IRJ:IRJ+N-1) := OP*v_{j}. | c %-----------------------------------% c - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) -c +c step3 = .false. c c %------------------------------------------% @@ -489,7 +489,7 @@ subroutine dsaitr c %------------------------------------------% c call dcopy (n, workd(irj), 1, resid, 1) -c +c c %-------------------------------------------% c | STEP 4: Finish extending the symmetric | c | Arnoldi to length j. If MODE = 2 | @@ -500,33 +500,33 @@ subroutine dsaitr c %-------------------------------------------% c if (mode .eq. 2) go to 65 - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 step4 = .true. ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-------------------------------------% c | Exit in order to compute B*OP*v_{j} | c %-------------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy(n, resid, 1 , workd(ipj), 1) end if 60 continue -c +c c %-----------------------------------% c | Back from reverse communication; | c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j}. | c %-----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) - end if + end if c step4 = .false. c @@ -545,7 +545,7 @@ subroutine dsaitr c wnorm = ddot (n, resid, 1, workd(ivj), 1) wnorm = sqrt(abs(wnorm)) - else if (bmat .eq. 'G') then + else if (bmat .eq. 'G') then wnorm = ddot (n, resid, 1, workd(ipj), 1) wnorm = sqrt(abs(wnorm)) else if (bmat .eq. 'I') then @@ -567,19 +567,19 @@ subroutine dsaitr c %------------------------------------------% c if (mode .ne. 2 ) then - call dgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero, + call dgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero, & workd(irj), 1) else if (mode .eq. 2) then - call dgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero, + call dgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero, & workd(irj), 1) end if c c %--------------------------------------% c | Orthgonalize r_{j} against V_{j}. | -c | RESID contains OP*v_{j}. See STEP 3. | +c | RESID contains OP*v_{j}. See STEP 3. | c %--------------------------------------% c - call dgemv('N', n, j, -one, v, ldv, workd(irj), 1, one, + call dgemv('N', n, j, -one, v, ldv, workd(irj), 1, one, & resid, 1) c c %--------------------------------------% @@ -592,46 +592,46 @@ subroutine dsaitr else h(j,1) = rnorm end if - call second (t4) -c + call arscnd (t4) +c orth1 = .true. iter = 0 -c - call second (t2) +c + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call dcopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*r_{j} | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd(ipj), 1) end if 70 continue -c +c c %---------------------------------------------------% c | Back from reverse communication if ORTH1 = .true. | c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c orth1 = .false. c c %------------------------------% c | Compute the B-norm of r_{j}. | c %------------------------------% c - if (bmat .eq. 'G') then + if (bmat .eq. 'G') then rnorm = ddot (n, resid, 1, workd(ipj), 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then @@ -655,7 +655,7 @@ subroutine dsaitr c if (rnorm .gt. 0.717*wnorm) go to 100 nrorth = nrorth + 1 -c +c c %---------------------------------------------------% c | Enter the Iterative refinement phase. If further | c | refinement is necessary, loop back here. The loop | @@ -668,7 +668,7 @@ subroutine dsaitr if (msglvl .gt. 2) then xtemp(1) = wnorm xtemp(2) = rnorm - call dvout (logfil, 2, xtemp, ndigit, + call dvout (logfil, 2, xtemp, ndigit, & '_saitr: re-orthonalization ; wnorm and rnorm are') end if c @@ -677,7 +677,7 @@ subroutine dsaitr c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | c %----------------------------------------------------% c - call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1, + call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1, & zero, workd(irj), 1) c c %----------------------------------------------% @@ -688,26 +688,26 @@ subroutine dsaitr c | H(j,j) is updated. | c %----------------------------------------------% c - call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1, + call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1, & one, resid, 1) c if (j .eq. 1 .or. rstart) h(j,1) = zero h(j,2) = h(j,2) + workd(irj + j - 1) -c +c orth2 = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call dcopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-----------------------------------% c | Exit in order to compute B*r_{j}. | c | r_{j} is the corrected residual. | c %-----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd(ipj), 1) @@ -719,15 +719,15 @@ subroutine dsaitr c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if c c %-----------------------------------------------------% c | Compute the B-norm of the corrected residual r_{j}. | c %-----------------------------------------------------% -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm1 = ddot (n, resid, 1, workd(ipj), 1) rnorm1 = sqrt(abs(rnorm1)) else if (bmat .eq. 'I') then @@ -735,7 +735,7 @@ subroutine dsaitr end if c if (msglvl .gt. 0 .and. iter .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_saitr: Iterative refinement for Arnoldi residual') if (msglvl .gt. 2) then xtemp(1) = rnorm @@ -744,7 +744,7 @@ subroutine dsaitr & '_saitr: iterative refinement ; rnorm and rnorm1 are') end if end if -c +c c %-----------------------------------------% c | Determine if we need to perform another | c | step of re-orthogonalization. | @@ -757,7 +757,7 @@ subroutine dsaitr c %--------------------------------% c rnorm = rnorm1 -c +c else c c %-------------------------------------------% @@ -779,7 +779,7 @@ subroutine dsaitr 95 continue rnorm = zero end if -c +c c %----------------------------------------------% c | Branch here directly if iterative refinement | c | wasn't necessary or after at most NITER_REF | @@ -787,13 +787,13 @@ subroutine dsaitr c %----------------------------------------------% c 100 continue -c +c rstart = .false. orth2 = .false. -c - call second (t5) +c + call arscnd (t5) titref = titref + (t5 - t4) -c +c c %----------------------------------------------------------% c | Make sure the last off-diagonal element is non negative | c | If not perform a similarity transformation on H(1:j,1:j) | @@ -802,28 +802,28 @@ subroutine dsaitr c if (h(j,1) .lt. zero) then h(j,1) = -h(j,1) - if ( j .lt. k+np) then + if ( j .lt. k+np) then call dscal(n, -one, v(1,j+1), 1) else call dscal(n, -one, resid, 1) end if end if -c +c c %------------------------------------% c | STEP 6: Update j = j+1; Continue | c %------------------------------------% c j = j + 1 if (j .gt. k+np) then - call second (t1) + call arscnd (t1) tsaitr = tsaitr + (t1 - t0) ido = 99 c if (msglvl .gt. 1) then - call dvout (logfil, k+np, h(1,2), ndigit, + call dvout (logfil, k+np, h(1,2), ndigit, & '_saitr: main diagonal of matrix H of step K+NP.') if (k+np .gt. 1) then - call dvout (logfil, k+np-1, h(2,1), ndigit, + call dvout (logfil, k+np-1, h(2,1), ndigit, & '_saitr: sub diagonal of matrix H of step K+NP.') end if end if @@ -836,7 +836,7 @@ subroutine dsaitr c %--------------------------------------------------------% c go to 1000 -c +c c %---------------------------------------------------------------% c | | c | E N D O F M A I N I T E R A T I O N L O O P | diff --git a/Toolbox/arpack-src/dsapps.f b/Toolbox/arpack-src/dsapps.f index 4089d2f8d..f84ef8389 100644 --- a/Toolbox/arpack-src/dsapps.f +++ b/Toolbox/arpack-src/dsapps.f @@ -12,8 +12,8 @@ c c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q c -c where Q is an orthogonal matrix of order KEV+NP. Q is the product of -c rotations resulting from the NP bulge chasing sweeps. The updated Arnoldi +c where Q is an orthogonal matrix of order KEV+NP. Q is the product of +c rotations resulting from the NP bulge chasing sweeps. The updated Arnoldi c factorization becomes: c c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T. @@ -49,7 +49,7 @@ c INPUT: H contains the symmetric tridiagonal matrix of the c Arnoldi factorization with the subdiagonal in the 1st column c starting at H(2,1) and the main diagonal in the 2nd column. -c OUTPUT: H contains the updated tridiagonal matrix in the +c OUTPUT: H contains the updated tridiagonal matrix in the c KEV leading submatrix. c c LDH Integer. (INPUT) @@ -85,13 +85,13 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c ivout ARPACK utility routine that prints integers. +c arscnd ARPACK utility routine for timing. c dvout ARPACK utility routine that prints vectors. c dlamch LAPACK routine that determines machine constants. c dlartg LAPACK Givens rotation construction routine. @@ -107,19 +107,19 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c 12/16/93: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sapps.F SID: 2.6 DATE OF SID: 3/28/97 RELEASE: 2 c c\Remarks c 1. In this version, each shift is applied to all the subblocks of -c the tridiagonal matrix H and not just to the submatrix that it -c comes from. This routine assumes that the subdiagonal elements +c the tridiagonal matrix H and not just to the submatrix that it +c comes from. This routine assumes that the subdiagonal elements c of H that are stored in h(1:kev+np,1) are nonegative upon input c and enforce this condition upon output. This version incorporates c deflation. See code for documentation. @@ -135,8 +135,8 @@ subroutine dsapps c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -149,7 +149,7 @@ subroutine dsapps c %-----------------% c Double precision - & h(ldh,2), q(ldq,kev+np), resid(n), shift(np), + & h(ldh,2), q(ldq,kev+np), resid(n), shift(np), & v(ldv,kev+np), workd(2*n) c c %------------% @@ -175,8 +175,8 @@ subroutine dsapps c | External Subroutines | c %----------------------% c - external daxpy, dcopy, dscal, dlacpy, dlartg, dlaset, dvout, - & ivout, second, dgemv + external daxpy, dcopy, dscal, dlacpy, dlartg, dlaset, dvout, + & ivout, arscnd, dgemv c c %--------------------% c | External Functions | @@ -193,7 +193,7 @@ subroutine dsapps intrinsic abs c c %----------------% -c | Data statments | +c | Data statements | c %----------------% c data first / .true. / @@ -213,11 +213,11 @@ subroutine dsapps c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msapps -c - kplusp = kev + np -c +c + kplusp = kev + np +c c %----------------------------------------------% c | Initialize Q to the identity matrix of order | c | kplusp used to accumulate the rotations. | @@ -230,7 +230,7 @@ subroutine dsapps c %----------------------------------------------% c if (np .eq. 0) go to 9000 -c +c c %----------------------------------------------------------% c | Apply the np shifts implicitly. Apply each shift to the | c | whole matrix and not just to the submatrix from which it | @@ -238,7 +238,7 @@ subroutine dsapps c %----------------------------------------------------------% c do 90 jj = 1, np -c +c istart = itop c c %----------------------------------------------------------% @@ -261,11 +261,11 @@ subroutine dsapps big = abs(h(i,2)) + abs(h(i+1,2)) if (h(i+1,1) .le. epsmch*big) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_sapps: deflation at row/column no.') - call ivout (logfil, 1, jj, ndigit, - & '_sapps: occured before shift number.') - call dvout (logfil, 1, h(i+1,1), ndigit, + call ivout (logfil, 1, [jj], ndigit, + & '_sapps: occurred before shift number.') + call dvout (logfil, 1, h(i+1,1), ndigit, & '_sapps: the corresponding off diagonal element') end if h(i+1,1) = zero @@ -277,7 +277,7 @@ subroutine dsapps 40 continue c if (istart .lt. iend) then -c +c c %--------------------------------------------------------% c | Construct the plane rotation G'(istart,istart+1,theta) | c | that attempts to drive h(istart+1,1) to zero. | @@ -286,7 +286,7 @@ subroutine dsapps f = h(istart,2) - shift(jj) g = h(istart+1,1) call dlartg (f, g, c, s, r) -c +c c %-------------------------------------------------------% c | Apply rotation to the left and right of H; | c | H <- G' * H * G, where G = G(istart,istart+1,theta). | @@ -296,11 +296,11 @@ subroutine dsapps a1 = c*h(istart,2) + s*h(istart+1,1) a2 = c*h(istart+1,1) + s*h(istart+1,2) a4 = c*h(istart+1,2) - s*h(istart+1,1) - a3 = c*h(istart+1,1) - s*h(istart,2) + a3 = c*h(istart+1,1) - s*h(istart,2) h(istart,2) = c*a1 + s*a2 h(istart+1,2) = c*a4 - s*a3 h(istart+1,1) = c*a3 + s*a4 -c +c c %----------------------------------------------------% c | Accumulate the rotation in the matrix Q; Q <- Q*G | c %----------------------------------------------------% @@ -323,7 +323,7 @@ subroutine dsapps c %----------------------------------------------% c do 70 i = istart+1, iend-1 -c +c c %----------------------------------------------% c | Construct the plane rotation G'(i,i+1,theta) | c | that zeros the i-th bulge that was created | @@ -351,23 +351,23 @@ subroutine dsapps c = -c s = -s end if -c +c c %--------------------------------------------% c | Apply rotation to the left and right of H; | c | H <- G * H * G', where G = G(i,i+1,theta) | c %--------------------------------------------% c h(i,1) = r -c +c a1 = c*h(i,2) + s*h(i+1,1) a2 = c*h(i+1,1) + s*h(i+1,2) a3 = c*h(i+1,1) - s*h(i,2) a4 = c*h(i+1,2) - s*h(i+1,1) -c +c h(i,2) = c*a1 + s*a2 h(i+1,2) = c*a4 - s*a3 h(i+1,1) = c*a3 + s*a4 -c +c c %----------------------------------------------------% c | Accumulate the rotation in the matrix Q; Q <- Q*G | c %----------------------------------------------------% @@ -425,16 +425,16 @@ subroutine dsapps c %------------------------------------------% c | All shifts have been applied. Check for | c | more possible deflation that might occur | -c | after the last shift is applied. | +c | after the last shift is applied. | c %------------------------------------------% c do 100 i = itop, kplusp-1 big = abs(h(i,2)) + abs(h(i+1,2)) if (h(i+1,1) .le. epsmch*big) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_sapps: deflation at row/column no.') - call dvout (logfil, 1, h(i+1,1), ndigit, + call dvout (logfil, 1, h(i+1,1), ndigit, & '_sapps: the corresponding off diagonal element') end if h(i+1,1) = zero @@ -447,13 +447,13 @@ subroutine dsapps c | This is not necessary if h(kev+1,1) = 0. | c %-------------------------------------------------% c - if ( h(kev+1,1) .gt. zero ) + if ( h(kev+1,1) .gt. zero ) & call dgemv ('N', n, kplusp, one, v, ldv, & q(1,kev+1), 1, zero, workd(n+1), 1) -c +c c %-------------------------------------------------------% c | Compute column 1 to kev of (V*Q) in backward order | -c | taking advantage that Q is an upper triangular matrix | +c | taking advantage that Q is an upper triangular matrix | c | with lower bandwidth np. | c | Place results in v(:,kplusp-kev:kplusp) temporarily. | c %-------------------------------------------------------% @@ -468,16 +468,18 @@ subroutine dsapps c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). | c %-------------------------------------------------% c - call dlacpy ('All', n, kev, v(1,np+1), ldv, v, ldv) -c + do 140 i = 1, kev + call dcopy (n, v(1,np+i), 1, v(1,i), 1) + 140 continue +c c %--------------------------------------------% c | Copy the (kev+1)-st column of (V*Q) in the | c | appropriate place if h(kev+1,1) .ne. zero. | c %--------------------------------------------% c - if ( h(kev+1,1) .gt. zero ) + if ( h(kev+1,1) .gt. zero ) & call dcopy (n, workd(n+1), 1, v(1,kev+1), 1) -c +c c %-------------------------------------% c | Update the residual vector: | c | r <- sigmak*r + betak*v(:,kev+1) | @@ -487,26 +489,26 @@ subroutine dsapps c %-------------------------------------% c call dscal (n, q(kplusp,kev), resid, 1) - if (h(kev+1,1) .gt. zero) + if (h(kev+1,1) .gt. zero) & call daxpy (n, h(kev+1,1), v(1,kev+1), 1, resid, 1) c if (msglvl .gt. 1) then - call dvout (logfil, 1, q(kplusp,kev), ndigit, + call dvout (logfil, 1, q(kplusp,kev), ndigit, & '_sapps: sigmak of the updated residual vector') - call dvout (logfil, 1, h(kev+1,1), ndigit, + call dvout (logfil, 1, h(kev+1,1), ndigit, & '_sapps: betak of the updated residual vector') - call dvout (logfil, kev, h(1,2), ndigit, + call dvout (logfil, kev, h(1,2), ndigit, & '_sapps: updated main diagonal of H for next iteration') if (kev .gt. 1) then - call dvout (logfil, kev-1, h(2,1), ndigit, + call dvout (logfil, kev-1, h(2,1), ndigit, & '_sapps: updated sub diagonal of H for next iteration') end if end if c - call second (t1) + call arscnd (t1) tsapps = tsapps + (t1 - t0) -c - 9000 continue +c + 9000 continue return c c %---------------% diff --git a/Toolbox/arpack-src/dsaup2.f b/Toolbox/arpack-src/dsaup2.f index b61ff5b64..fd4143f53 100644 --- a/Toolbox/arpack-src/dsaup2.f +++ b/Toolbox/arpack-src/dsaup2.f @@ -3,35 +3,35 @@ c c\Name: dsaup2 c -c\Description: +c\Description: c Intermediate level interface called by dsaupd. c c\Usage: -c call dsaup2 +c call dsaup2 c ( IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD, -c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL, +c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL, c IPNTR, WORKD, INFO ) c c\Arguments c c IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in dsaupd. c MODE, ISHIFT, MXITER: see the definition of IPARAM in dsaupd. -c +c c NP Integer. (INPUT/OUTPUT) -c Contains the number of implicit shifts to apply during -c each Arnoldi/Lanczos iteration. -c If ISHIFT=1, NP is adjusted dynamically at each iteration +c Contains the number of implicit shifts to apply during +c each Arnoldi/Lanczos iteration. +c If ISHIFT=1, NP is adjusted dynamically at each iteration c to accelerate convergence and prevent stagnation. -c This is also roughly equal to the number of matrix-vector +c This is also roughly equal to the number of matrix-vector c products (involving the operator OP) per Arnoldi iteration. c The logic for adjusting is contained within the current c subroutine. c If ISHIFT=0, NP is the number of shifts the user needs -c to provide via reverse comunication. 0 < NP < NCV-NEV. +c to provide via reverse communication. 0 < NP < NCV-NEV. c NP may be less than NCV-NEV since a leading block of the current c upper Tridiagonal matrix has split off and contains "unwanted" c Ritz values. -c Upon termination of the IRA iteration, NP contains the number +c Upon termination of the IRA iteration, NP contains the number c of "converged" wanted Ritz values. c c IUPD Integer. (INPUT) @@ -42,18 +42,18 @@ c The Lanczos basis vectors. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Double precision (NEV+NP) by 2 array. (OUTPUT) c H is used to store the generated symmetric tridiagonal matrix -c The subdiagonal is stored in the first column of H starting -c at H(2,1). The main diagonal is stored in the second column -c of H starting at H(1,2). If dsaup2 converges store the +c The subdiagonal is stored in the first column of H starting +c at H(2,1). The main diagonal is stored in the arscnd column +c of H starting at H(1,2). If dsaup2 converges store the c B-norm of the final residual vector in H(1,1). c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c RITZ Double precision array of length NEV+NP. (OUTPUT) @@ -63,33 +63,33 @@ c BOUNDS(1:NEV) contain the error bounds corresponding to RITZ. c c Q Double precision (NEV+NP) by (NEV+NP) array. (WORKSPACE) -c Private (replicated) work array used to accumulate the +c Private (replicated) work array used to accumulate the c rotation in the shift application step. c c LDQ Integer. (INPUT) c Leading dimension of Q exactly as declared in the calling c program. -c +c c WORKL Double precision array of length at least 3*(NEV+NP). (INPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on -c the front end. It is used in the computation of the +c the front end. It is used in the computation of the c tridiagonal eigenvalue problem, the calculation and c application of the shifts and convergence checking. c If ISHIFT .EQ. O and IDO .EQ. 3, the first NP locations -c of WORKL are used in reverse communication to hold the user +c of WORKL are used in reverse communication to hold the user c supplied shifts. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORKD for +c Pointer to mark the starting locations in the WORKD for c vectors used by the Lanczos iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in one of +c IPNTR(3): pointer to the vector B * X when used in one of c the spectral transformation modes. X is the current c operand. c ------------------------------------------------------------- -c +c c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Lanczos iteration c for reverse communication. The user should not use WORKD @@ -102,9 +102,9 @@ c possibly from a previous run. c Error flag on output. c = 0: Normal return. -c = 1: All possible eigenvalues of OP has been found. +c = 1: All possible eigenvalues of OP has been found. c NP returns the size of the invariant subspace -c spanning the operator OP. +c spanning the operator OP. c = 2: No shifts could be applied. c = -8: Error return from trid. eigenvalue calculation; c This should never happen. @@ -122,7 +122,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall, @@ -132,15 +132,15 @@ c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to c Implement the Spectral Transformation", Math. Comp., 48 (1987), c pp 663-673. -c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos -c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", +c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos +c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", c SIAM J. Matr. Anal. Apps., January (1993). c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines c for Updating the QR decomposition", ACM TOMS, December 1990, c Volume 16 Number 4, pp 369-377. c c\Routines called: -c dgetv0 ARPACK initial vector generation routine. +c dgetv0 ARPACK initial vector generation routine. c dsaitr ARPACK Lanczos factorization routine. c dsapps ARPACK application of implicit shifts routine. c dsconv ARPACK convergence of Ritz values routine. @@ -148,11 +148,11 @@ c dsgets ARPACK reorder Ritz values and error bounds routine. c dsortr ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dvout ARPACK utility routine that prints vectors. c dlamch LAPACK routine that determines machine constants. c dcopy Level 1 BLAS that copies one vector to another. -c ddot Level 1 BLAS that computes the scalar product of two vectors. +c ddot Level 1 BLAS that computes the scalar product of two vectors. c dnrm2 Level 1 BLAS that computes the norm of a vector. c dscal Level 1 BLAS that scales a vector. c dswap Level 1 BLAS that swaps two vectors. @@ -162,14 +162,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c 12/15/93: Version ' 2.4' c xx/xx/95: Version ' 2.4'. (R.B. Lehoucq) c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: saup2.F SID: 2.7 DATE OF SID: 5/19/98 RELEASE: 2 c c\EndLib @@ -177,16 +177,16 @@ c----------------------------------------------------------------------- c subroutine dsaup2 - & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, + & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, + & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, & q, ldq, workl, ipntr, workd, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -204,8 +204,8 @@ subroutine dsaup2 c integer ipntr(3) Double precision - & bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n), - & ritz(nev+np), v(ldv,nev+np), workd(3*n), + & bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n), + & ritz(nev+np), v(ldv,nev+np), workd(3*n), & workl(3*(nev+np)) c c %------------% @@ -222,8 +222,8 @@ subroutine dsaup2 c character wprime*2 logical cnorm, getv0, initv, update, ushift - integer ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0, - & np0, nptemp, nevd2, nevm2, kp(3) + integer ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0, + & np0, nptemp, nevd2, nevm2, kp(3) Double precision & rnorm, temp, eps23 save cnorm, getv0, initv, update, ushift, @@ -234,8 +234,8 @@ subroutine dsaup2 c | External Subroutines | c %----------------------% c - external dcopy, dgetv0, dsaitr, dscal, dsconv, dseigt, dsgets, - & dsapps, dsortr, dvout, ivout, second, dswap + external dcopy, dgetv0, dsaitr, dscal, dsconv, dseigt, dsgets, + & dsapps, dsortr, dvout, ivout, arscnd, dswap c c %--------------------% c | External Functions | @@ -256,13 +256,13 @@ subroutine dsaup2 c %-----------------------% c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msaup2 c c %---------------------------------% @@ -292,7 +292,7 @@ subroutine dsaup2 kplusp = nev0 + np0 nconv = 0 iter = 0 -c +c c %--------------------------------------------% c | Set flags for computing the first NEV steps | c | of the Lanczos factorization. | @@ -315,7 +315,7 @@ subroutine dsaup2 initv = .false. end if end if -c +c c %---------------------------------------------% c | Get a possibly random starting vector and | c | force it into the range of the operator OP. | @@ -332,7 +332,7 @@ subroutine dsaup2 if (rnorm .eq. zero) then c c %-----------------------------------------% -c | The initial vector is zero. Error exit. | +c | The initial vector is zero. Error exit. | c %-----------------------------------------% c info = -9 @@ -341,7 +341,7 @@ subroutine dsaup2 getv0 = .false. ido = 0 end if -c +c c %------------------------------------------------------------% c | Back from reverse communication: continue with update step | c %------------------------------------------------------------% @@ -360,14 +360,14 @@ subroutine dsaup2 c %-------------------------------------% c if (cnorm) go to 100 -c +c c %----------------------------------------------------------% c | Compute the first NEV steps of the Lanczos factorization | c %----------------------------------------------------------% c - call dsaitr (ido, bmat, n, 0, nev0, mode, resid, rnorm, v, ldv, + call dsaitr (ido, bmat, n, 0, nev0, mode, resid, rnorm, v, ldv, & h, ldh, ipntr, workd, info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -388,7 +388,7 @@ subroutine dsaup2 info = -9999 go to 1200 end if -c +c c %--------------------------------------------------------------% c | | c | M A I N LANCZOS I T E R A T I O N L O O P | @@ -396,22 +396,22 @@ subroutine dsaup2 c | factorization in place. | c | | c %--------------------------------------------------------------% -c +c 1000 continue c iter = iter + 1 c if (msglvl .gt. 0) then - call ivout (logfil, 1, iter, ndigit, + call ivout (logfil, 1, [iter], ndigit, & '_saup2: **** Start of major iteration number ****') end if if (msglvl .gt. 1) then - call ivout (logfil, 1, nev, ndigit, + call ivout (logfil, 1, [nev], ndigit, & '_saup2: The length of the current Lanczos factorization') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_saup2: Extend the Lanczos factorization by') end if -c +c c %------------------------------------------------------------% c | Compute NP additional steps of the Lanczos factorization. | c %------------------------------------------------------------% @@ -420,9 +420,9 @@ subroutine dsaup2 20 continue update = .true. c - call dsaitr (ido, bmat, n, nev, np, mode, resid, rnorm, v, + call dsaitr (ido, bmat, n, nev, np, mode, resid, rnorm, v, & ldv, h, ldh, ipntr, workd, info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -434,7 +434,7 @@ subroutine dsaup2 c c %-----------------------------------------------------% c | dsaitr was unable to build an Lanczos factorization | -c | of length NEV0+NP0. INFO is returned with the size | +c | of length NEV0+NP0. INFO is returned with the size | c | of the factorization built. Exit main loop. | c %-----------------------------------------------------% c @@ -446,10 +446,10 @@ subroutine dsaup2 update = .false. c if (msglvl .gt. 1) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_saup2: Current B-norm of residual for factorization') end if -c +c c %--------------------------------------------------------% c | Compute the eigenvalues and corresponding error bounds | c | of the current symmetric tridiagonal matrix. | @@ -483,7 +483,7 @@ subroutine dsaup2 nev = nev0 np = np0 call dsgets (ishift, which, nev, np, ritz, bounds, workl) -c +c c %-------------------% c | Convergence test. | c %-------------------% @@ -520,11 +520,11 @@ subroutine dsaup2 nev = nev + 1 end if 30 continue -c - if ( (nconv .ge. nev0) .or. +c + if ( (nconv .ge. nev0) .or. & (iter .gt. mxiter) .or. & (np .eq. 0) ) then -c +c c %------------------------------------------------% c | Prepare to exit. Put the converged Ritz values | c | and corresponding bounds in RITZ(1:NCONV) and | @@ -547,8 +547,9 @@ subroutine dsaup2 wprime = 'SA' call dsortr (wprime, .true., kplusp, ritz, bounds) nevd2 = nev0 / 2 - nevm2 = nev0 - nevd2 + nevm2 = nev0 - nevd2 if ( nev .gt. 1 ) then + np = kplusp - nev0 call dswap ( min(nevd2,np), ritz(nevm2+1), 1, & ritz( max(kplusp-nevd2+1,kplusp-np+1) ), 1) call dswap ( min(nevd2,np), bounds(nevm2+1), 1, @@ -587,7 +588,7 @@ subroutine dsaup2 c c %----------------------------------------------------% c | Sort the Ritz values according to the scaled Ritz | -c | esitmates. This will push all the converged ones | +c | estimates. This will push all the converged ones | c | towards the front of ritzr, ritzi, bounds | c | (in the case when NCONV < NEV.) | c %----------------------------------------------------% @@ -651,13 +652,13 @@ subroutine dsaup2 end if c c %------------------------------------% -c | Max iterations have been exceeded. | +c | Max iterations have been exceeded. | c %------------------------------------% c if (iter .gt. mxiter .and. nconv .lt. nev) info = 1 c c %---------------------% -c | No shifts to apply. | +c | No shifts to apply. | c %---------------------% c if (np .eq. 0 .and. nconv .lt. nev0) info = 2 @@ -681,20 +682,20 @@ subroutine dsaup2 nev = 2 end if np = kplusp - nev -c +c c %---------------------------------------% c | If the size of NEV was just increased | c | resort the eigenvalues. | c %---------------------------------------% -c - if (nevbef .lt. nev) +c + if (nevbef .lt. nev) & call dsgets (ishift, which, nev, np, ritz, bounds, & workl) c end if c if (msglvl .gt. 0) then - call ivout (logfil, 1, nconv, ndigit, + call ivout (logfil, 1, [nconv], ndigit, & '_saup2: no. of "converged" Ritz values at this iter.') if (msglvl .gt. 1) then kp(1) = nev @@ -708,7 +709,7 @@ subroutine dsaup2 end if end if -c +c if (ishift .eq. 0) then c c %-----------------------------------------------------% @@ -731,8 +732,8 @@ subroutine dsaup2 c %------------------------------------% c ushift = .false. -c -c +c +c c %---------------------------------------------------------% c | Move the NP shifts to the first NP locations of RITZ to | c | free up WORKL. This is for the non-exact shift case; | @@ -742,7 +743,7 @@ subroutine dsaup2 if (ishift .eq. 0) call dcopy (np, workl, 1, ritz, 1) c if (msglvl .gt. 2) then - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_saup2: The number of shifts to apply ') call dvout (logfil, np, workl, ndigit, & '_saup2: shifts selected') @@ -751,7 +752,7 @@ subroutine dsaup2 & '_saup2: corresponding Ritz estimates') end if end if -c +c c %---------------------------------------------------------% c | Apply the NP0 implicit shifts by QR bulge chasing. | c | Each shift is applied to the entire tridiagonal matrix. | @@ -770,36 +771,36 @@ subroutine dsaup2 c %---------------------------------------------% c cnorm = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call dcopy (n, resid, 1, workd(n+1), 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*RESID | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call dcopy (n, resid, 1, workd, 1) end if -c +c 100 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(1:N) := B*RESID | c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm = ddot (n, resid, 1, workd, 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then @@ -809,14 +810,14 @@ subroutine dsaup2 130 continue c if (msglvl .gt. 2) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_saup2: B-norm of residual for NEV factorization') call dvout (logfil, nev, h(1,2), ndigit, & '_saup2: main diagonal of compressed H matrix') call dvout (logfil, nev-1, h(2,1), ndigit, & '_saup2: subdiagonal of compressed H matrix') end if -c +c go to 1000 c c %---------------------------------------------------------------% @@ -824,12 +825,12 @@ subroutine dsaup2 c | E N D O F M A I N I T E R A T I O N L O O P | c | | c %---------------------------------------------------------------% -c +c 1100 continue c mxiter = iter nev = nconv -c +c 1200 continue ido = 99 c @@ -837,9 +838,9 @@ subroutine dsaup2 c | Error exit | c %------------% c - call second (t1) + call arscnd (t1) tsaup2 = t1 - t0 -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/dsaupd.f b/Toolbox/arpack-src/dsaupd.f index 7808e7432..81a0ce52c 100644 --- a/Toolbox/arpack-src/dsaupd.f +++ b/Toolbox/arpack-src/dsaupd.f @@ -1,33 +1,33 @@ c----------------------------------------------------------------------- c\BeginDoc c -c\Name: dsaupd +c\Name: dsaupd c -c\Description: +c\Description: c -c Reverse communication interface for the Implicitly Restarted Arnoldi -c Iteration. For symmetric problems this reduces to a variant of the Lanczos -c method. This method has been designed to compute approximations to a -c few eigenpairs of a linear operator OP that is real and symmetric -c with respect to a real positive semi-definite symmetric matrix B, +c Reverse communication interface for the Implicitly Restarted Arnoldi +c Iteration. For symmetric problems this reduces to a variant of the Lanczos +c method. This method has been designed to compute approximations to a +c few eigenpairs of a linear operator OP that is real and symmetric +c with respect to a real positive semi-definite symmetric matrix B, c i.e. -c -c B*OP = (OP`)*B. c -c Another way to express this condition is +c B*OP = (OP`)*B. +c +c Another way to express this condition is c c < x,OPy > = < OPx,y > where < z,w > = z`Bw . -c -c In the standard eigenproblem B is the identity matrix. +c +c In the standard eigenproblem B is the identity matrix. c ( A` denotes transpose of A) c c The computed approximate eigenvalues are called Ritz values and c the corresponding approximate eigenvectors are called Ritz vectors. c -c dsaupd is usually called iteratively to solve one of the +c dsaupd is usually called iteratively to solve one of the c following problems: c -c Mode 1: A*x = lambda*x, A symmetric +c Mode 1: A*x = lambda*x, A symmetric c ===> OP = A and B = I. c c Mode 2: A*x = lambda*M*x, A symmetric, M symmetric positive definite @@ -35,10 +35,10 @@ c ===> (If M can be factored see remark 3 below) c c Mode 3: K*x = lambda*M*x, K symmetric, M symmetric positive semi-definite -c ===> OP = (inv[K - sigma*M])*M and B = M. +c ===> OP = (inv[K - sigma*M])*M and B = M. c ===> Shift-and-Invert mode c -c Mode 4: K*x = lambda*KG*x, K symmetric positive semi-definite, +c Mode 4: K*x = lambda*KG*x, K symmetric positive semi-definite, c KG symmetric indefinite c ===> OP = (inv[K - sigma*KG])*K and B = K. c ===> Buckling mode @@ -60,13 +60,13 @@ c approximations. c c\Usage: -c call dsaupd +c call dsaupd c ( IDO, BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, c IPNTR, WORKD, WORKL, LWORKL, INFO ) c c\Arguments c IDO Integer. (INPUT/OUTPUT) -c Reverse communication flag. IDO must be zero on the first +c Reverse communication flag. IDO must be zero on the first c call to dsaupd . IDO will be set internally to c indicate the type of operation to be performed. Control is c then given back to the calling routine which has the @@ -95,7 +95,7 @@ c placing the shifts. See remark 6 below. c IDO = 99: done c ------------------------------------------------------------- -c +c c BMAT Character*1. (INPUT) c BMAT specifies the type of the matrix B that defines the c semi-inner product for the operator OP. @@ -111,7 +111,7 @@ c 'LA' - compute the NEV largest (algebraic) eigenvalues. c 'SA' - compute the NEV smallest (algebraic) eigenvalues. c 'LM' - compute the NEV largest (in magnitude) eigenvalues. -c 'SM' - compute the NEV smallest (in magnitude) eigenvalues. +c 'SM' - compute the NEV smallest (in magnitude) eigenvalues. c 'BE' - compute NEV eigenvalues, half from each end of the c spectrum. When NEV is odd, compute one more from the c high end than from the low end. @@ -121,27 +121,27 @@ c Number of eigenvalues of OP to be computed. 0 < NEV < N. c c TOL Double precision scalar. (INPUT) -c Stopping criterion: the relative accuracy of the Ritz value +c Stopping criterion: the relative accuracy of the Ritz value c is considered acceptable if BOUNDS(I) .LE. TOL*ABS(RITZ(I)). c If TOL .LE. 0. is passed a default is set: c DEFAULT = DLAMCH ('EPS') (machine precision as computed c by the LAPACK auxiliary subroutine DLAMCH ). c c RESID Double precision array of length N. (INPUT/OUTPUT) -c On INPUT: +c On INPUT: c If INFO .EQ. 0, a random initial residual vector is used. c If INFO .NE. 0, RESID contains the initial residual vector, c possibly from a previous run. c On OUTPUT: -c RESID contains the final residual vector. +c RESID contains the final residual vector. c c NCV Integer. (INPUT) c Number of columns of the matrix V (less than or equal to N). -c This will indicate how many Lanczos vectors are generated -c at each iteration. After the startup phase in which NEV -c Lanczos vectors are generated, the algorithm generates +c This will indicate how many Lanczos vectors are generated +c at each iteration. After the startup phase in which NEV +c Lanczos vectors are generated, the algorithm generates c NCV-NEV Lanczos vectors at each subsequent update iteration. -c Most of the cost in generating each Lanczos vector is in the +c Most of the cost in generating each Lanczos vector is in the c matrix-vector product OP*x. (See remark 4 below). c c V Double precision N by NCV array. (OUTPUT) @@ -161,10 +161,10 @@ c the current tridiagonal matrix T are returned in c the part of WORKL array corresponding to RITZ. c See remark 6 below. -c ISHIFT = 1: exact shifts with respect to the reduced -c tridiagonal matrix T. This is equivalent to -c restarting the iteration with a starting vector -c that is a linear combination of Ritz vectors +c ISHIFT = 1: exact shifts with respect to the reduced +c tridiagonal matrix T. This is equivalent to +c restarting the iteration with a starting vector +c that is a linear combination of Ritz vectors c associated with the "wanted" Ritz values. c ------------------------------------------------------------- c @@ -172,8 +172,8 @@ c No longer referenced. See remark 2 below. c c IPARAM(3) = MXITER -c On INPUT: maximum number of Arnoldi update iterations allowed. -c On OUTPUT: actual number of Arnoldi update iterations taken. +c On INPUT: maximum number of Arnoldi update iterations allowed. +c On OUTPUT: actual number of Arnoldi update iterations taken. c c IPARAM(4) = NB: blocksize to be used in the recurrence. c The code currently works only for NB = 1. @@ -183,11 +183,11 @@ c the convergence criterion. c c IPARAM(6) = IUPD -c No longer referenced. Implicit restarting is ALWAYS used. +c No longer referenced. Implicit restarting is ALWAYS used. c c IPARAM(7) = MODE c On INPUT determines what type of eigenproblem is being solved. -c Must be 1,2,3,4,5; See under \Description of dsaupd for the +c Must be 1,2,3,4,5; See under \Description of dsaupd for the c five modes available. c c IPARAM(8) = NP @@ -199,7 +199,7 @@ c IPARAM(9) = NUMOP, IPARAM(10) = NUMOPB, IPARAM(11) = NUMREO, c OUTPUT: NUMOP = total number of OP*x operations, c NUMOPB = total number of B*x operations if BMAT='G', -c NUMREO = total number of steps of re-orthogonalization. +c NUMREO = total number of steps of re-orthogonalization. c c IPNTR Integer array of length 11. (OUTPUT) c Pointer to mark the starting locations in the WORKD and WORKL @@ -207,7 +207,7 @@ c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X in WORKD. c IPNTR(2): pointer to the current result vector Y in WORKD. -c IPNTR(3): pointer to the vector B * X in WORKD when used in +c IPNTR(3): pointer to the vector B * X in WORKD when used in c the shift-and-invert mode. c IPNTR(4): pointer to the next available location in WORKL c that is untouched by the program. @@ -224,14 +224,14 @@ c of the tridiagonal matrix T. Only referenced by c dseupd if RVEC = .TRUE. See Remarks. c ------------------------------------------------------------- -c +c c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The user should not use WORKD +c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration. Upon termination c WORKD(1:N) contains B*RESID(1:N). If the Ritz vectors are desired c subroutine dseupd uses this output. -c See Data Distribution Note below. +c See Data Distribution Note below. c c WORKL Double precision work array of length LWORKL. (OUTPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on @@ -247,13 +247,13 @@ c Error flag on output. c = 0: Normal exit. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. IPARAM(5) +c All possible eigenvalues of OP has been found. IPARAM(5) c returns the number of wanted converged Ritz values. c = 2: No longer an informational error. Deprecated starting c with release 2 of ARPACK. -c = 3: No shifts could be applied during a cycle of the -c Implicitly restarted Arnoldi iteration. One possibility -c is to increase the size of NCV relative to NEV. +c = 3: No shifts could be applied during a cycle of the +c Implicitly restarted Arnoldi iteration. One possibility +c is to increase the size of NCV relative to NEV. c See remark 4 below. c = -1: N must be positive. c = -2: NEV must be positive. @@ -267,9 +267,9 @@ c Informatinal error from LAPACK routine dsteqr . c = -9: Starting vector is zero. c = -10: IPARAM(7) must be 1,2,3,4,5. -c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatable. +c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatible. c = -12: IPARAM(1) must be equal to 0 or 1. -c = -13: NEV and WHICH = 'BE' are incompatable. +c = -13: NEV and WHICH = 'BE' are incompatible. c = -9999: Could not build an Arnoldi factorization. c IPARAM(5) returns the size of the current Arnoldi c factorization. The user is advised to check that @@ -277,12 +277,12 @@ c c c\Remarks -c 1. The converged Ritz values are always returned in ascending +c 1. The converged Ritz values are always returned in ascending c algebraic order. The computed Ritz values are approximate c eigenvalues of OP. The selection of WHICH should be made -c with this in mind when Mode = 3,4,5. After convergence, -c approximate eigenvalues of the original problem may be obtained -c with the ARPACK subroutine dseupd . +c with this in mind when Mode = 3,4,5. After convergence, +c approximate eigenvalues of the original problem may be obtained +c with the ARPACK subroutine dseupd . c c 2. If the Ritz vectors corresponding to the converged Ritz values c are needed, the user must call dseupd immediately following completion @@ -290,38 +290,38 @@ c c 3. If M can be factored into a Cholesky factorization M = LL` c then Mode = 2 should not be selected. Instead one should use -c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular +c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular c linear systems should be solved with L and L` rather c than computing inverses. After convergence, an approximate c eigenvector z of the original problem is recovered by solving c L`z = x where x is a Ritz vector of OP. c c 4. At present there is no a-priori analysis to guide the selection -c of NCV relative to NEV. The only formal requrement is that NCV > NEV. +c of NCV relative to NEV. The only formal requirement is that NCV > NEV. c However, it is recommended that NCV .ge. 2*NEV. If many problems of c the same type are to be solved, one should experiment with increasing -c NCV while keeping NEV fixed for a given test problem. This will +c NCV while keeping NEV fixed for a given test problem. This will c usually decrease the required number of OP*x operations but it c also increases the work and storage required to maintain the orthogonal c basis vectors. The optimal "cross-over" with respect to CPU time c is problem dependent and must be determined empirically. c -c 5. If IPARAM(7) = 2 then in the Reverse commuication interface the user +c 5. If IPARAM(7) = 2 then in the Reverse communication interface the user c must do the following. When IDO = 1, Y = OP * X is to be computed. c When IPARAM(7) = 2 OP = inv(B)*A. After computing A*X the user c must overwrite X with A*X. Y is then the solution to the linear set c of equations B*Y = A*X. c -c 6. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the -c NP = IPARAM(8) shifts in locations: -c 1 WORKL(IPNTR(11)) -c 2 WORKL(IPNTR(11)+1) -c . -c . -c . -c NP WORKL(IPNTR(11)+NP-1). +c 6. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the +c NP = IPARAM(8) shifts in locations: +c 1 WORKL(IPNTR(11)) +c 2 WORKL(IPNTR(11)+1) +c . +c . +c . +c NP WORKL(IPNTR(11)+NP-1). c -c The eigenvalues of the current tridiagonal matrix are located in +c The eigenvalues of the current tridiagonal matrix are located in c WORKL(IPNTR(6)) through WORKL(IPNTR(6)+NCV-1). They are in the c order defined by WHICH. The associated Ritz estimates are located in c WORKL(IPNTR(8)), WORKL(IPNTR(8)+1), ... , WORKL(IPNTR(8)+NCV-1). @@ -347,7 +347,7 @@ c REAL RESID(N), V(LDV,NCV), WORKD(N,3), WORKL(LWORKL) c SHARED RESID(BLOCK), V(BLOCK,:), WORKD(BLOCK,:) c REPLICATED WORKL(LWORKL) -c +c c c\BeginLib c @@ -355,7 +355,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall, @@ -365,8 +365,8 @@ c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to c Implement the Spectral Transformation", Math. Comp., 48 (1987), c pp 663-673. -c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos -c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", +c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos +c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", c SIAM J. Matr. Anal. Apps., January (1993). c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines c for Updating the QR decomposition", ACM TOMS, December 1990, @@ -380,7 +380,7 @@ c dstats ARPACK routine that initialize timing and other statistics c variables. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dvout ARPACK utility routine that prints vectors. c dlamch LAPACK routine that determines machine constants. c @@ -389,14 +389,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: -c 12/15/93: Version ' 2.4' +c 12/15/93: Version ' 2.4' c -c\SCCS Information: @(#) -c FILE: saupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 +c\SCCS Information: @(#) +c FILE: saupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 c c\Remarks c 1. None @@ -405,16 +405,16 @@ c c----------------------------------------------------------------------- c - subroutine dsaupd - & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, + subroutine dsaupd + & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, & ipntr, workd, workl, lworkl, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -422,7 +422,7 @@ subroutine dsaupd c character bmat*1, which*2 integer ido, info, ldv, lworkl, n, ncv, nev - Double precision + Double precision & tol c c %-----------------% @@ -430,14 +430,14 @@ subroutine dsaupd c %-----------------% c integer iparam(11), ipntr(11) - Double precision + Double precision & resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) c c %------------% c | Parameters | c %------------% c - Double precision + Double precision & one, zero parameter (one = 1.0D+0 , zero = 0.0D+0 ) c @@ -445,7 +445,7 @@ subroutine dsaupd c | Local Scalars | c %---------------% c - integer bounds, ierr, ih, iq, ishift, iupd, iw, + integer bounds, ierr, ih, iq, ishift, iupd, iw, & ldh, ldq, msglvl, mxiter, mode, nb, & nev0, next, np, ritz, j save bounds, ierr, ih, iq, ishift, iupd, iw, @@ -456,20 +456,20 @@ subroutine dsaupd c | External Subroutines | c %----------------------% c - external dsaup2 , dvout , ivout, second, dstats + external dsaup2 , dvout , ivout, arscnd, dstats c c %--------------------% c | External Functions | c %--------------------% c - Double precision - & dlamch - external dlamch + Double precision + & dlamch + external dlamch c c %-----------------------% c | Executable Statements | c %-----------------------% -c +c if (ido .eq. 0) then c c %-------------------------------% @@ -477,8 +477,8 @@ subroutine dsaupd c | & message level for debugging | c %-------------------------------% c - call dstats - call second (t0) + call dstats + call arscnd (t0) msglvl = msaupd c ierr = 0 @@ -512,7 +512,7 @@ subroutine dsaupd c %----------------------------------------------% c np = ncv - nev -c +c if (mxiter .le. 0) ierr = -4 if (which .ne. 'LM' .and. & which .ne. 'SM' .and. @@ -531,7 +531,7 @@ subroutine dsaupd else if (nev .eq. 1 .and. which .eq. 'BE') then ierr = -13 end if -c +c c %------------% c | Error Exit | c %------------% @@ -541,7 +541,7 @@ subroutine dsaupd ido = 99 go to 9000 end if -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -557,8 +557,8 @@ subroutine dsaupd c %----------------------------------------------% c np = ncv - nev - nev0 = nev -c + nev0 = nev +c c %-----------------------------% c | Zero out internal workspace | c %-----------------------------% @@ -566,7 +566,7 @@ subroutine dsaupd do 10 j = 1, ncv**2 + 8*ncv workl(j) = zero 10 continue -c +c c %-------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -599,7 +599,7 @@ subroutine dsaupd c | Carry out the Implicitly restarted Lanczos Iteration. | c %-------------------------------------------------------% c - call dsaup2 + call dsaup2 & ( ido, bmat, n, which, nev0, np, tol, resid, mode, iupd, & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritz), & workl(bounds), workl(iq), ldq, workl(iw), ipntr, workd, @@ -612,7 +612,7 @@ subroutine dsaupd c if (ido .eq. 3) iparam(8) = np if (ido .ne. 99) go to 9000 -c +c iparam(3) = mxiter iparam(5) = np iparam(9) = nopx @@ -628,19 +628,19 @@ subroutine dsaupd if (info .eq. 2) info = 3 c if (msglvl .gt. 0) then - call ivout (logfil, 1, mxiter, ndigit, + call ivout (logfil, 1, [mxiter], ndigit, & '_saupd: number of update iterations taken') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_saupd: number of "converged" Ritz values') - call dvout (logfil, np, workl(Ritz), ndigit, + call dvout (logfil, np, workl(Ritz), ndigit, & '_saupd: final Ritz values') - call dvout (logfil, np, workl(Bounds), ndigit, + call dvout (logfil, np, workl(Bounds), ndigit, & '_saupd: corresponding error bounds') - end if + end if c - call second (t1) + call arscnd (t1) tsaupd = t1 - t0 -c +c if (msglvl .gt. 0) then c c %--------------------------------------------------------% @@ -678,9 +678,9 @@ subroutine dsaupd & 5x, 'Total time in applying the shifts = ', f12.6,/ & 5x, 'Total time in convergence testing = ', f12.6) end if -c +c 9000 continue -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/dsconv.f b/Toolbox/arpack-src/dsconv.f index 888a1dab8..82dbb6e61 100644 --- a/Toolbox/arpack-src/dsconv.f +++ b/Toolbox/arpack-src/dsconv.f @@ -3,7 +3,7 @@ c c\Name: dsconv c -c\Description: +c\Description: c Convergence testing for the symmetric Arnoldi eigenvalue routine. c c\Usage: @@ -34,23 +34,23 @@ c\BeginLib c c\Routines called: -c second ARPACK utility routine for timing. -c dlamch LAPACK routine that determines machine constants. +c arscnd ARPACK utility routine for timing. +c dlamch LAPACK routine that determines machine constants. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sconv.F SID: 2.4 DATE OF SID: 4/19/96 RELEASE: 2 c c\Remarks c 1. Starting with version 2.4, this routine no longer uses the -c Parlett strategy using the gap conditions. +c Parlett strategy using the gap conditions. c c\EndLib c @@ -62,8 +62,8 @@ subroutine dsconv (n, ritz, bounds, tol, nconv) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -106,9 +106,9 @@ subroutine dsconv (n, ritz, bounds, tol, nconv) c | Executable Statements | c %-----------------------% c - call second (t0) + call arscnd (t0) c - eps23 = dlamch('Epsilon-Machine') + eps23 = dlamch('Epsilon-Machine') eps23 = eps23**(2.0D+0 / 3.0D+0) c nconv = 0 @@ -125,10 +125,10 @@ subroutine dsconv (n, ritz, bounds, tol, nconv) end if c 10 continue -c - call second (t1) +c + call arscnd (t1) tsconv = tsconv + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/dseigt.f b/Toolbox/arpack-src/dseigt.f index fa031b090..5e20c805b 100644 --- a/Toolbox/arpack-src/dseigt.f +++ b/Toolbox/arpack-src/dseigt.f @@ -3,7 +3,7 @@ c c\Name: dseigt c -c\Description: +c\Description: c Compute the eigenvalues of the current symmetric tridiagonal matrix c and the corresponding error bounds given the current residual norm. c @@ -20,16 +20,16 @@ c Size of the symmetric tridiagonal matrix H. c c H Double precision N by 2 array. (INPUT) -c H contains the symmetric tridiagonal matrix with the -c subdiagonal in the first column starting at H(2,1) and the +c H contains the symmetric tridiagonal matrix with the +c subdiagonal in the first column starting at H(2,1) and the c main diagonal in second column. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c EIG Double precision array of length N. (OUTPUT) -c On output, EIG contains the N eigenvalues of H possibly +c On output, EIG contains the N eigenvalues of H possibly c unsorted. The BOUNDS arrays are returned in the c same sorted order as EIG. c @@ -59,22 +59,22 @@ c dstqrb ARPACK routine that computes the eigenvalues and the c last components of the eigenvectors of a symmetric c and tridiagonal matrix. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dvout ARPACK utility routine that prints vectors. c dcopy Level 1 BLAS that copies one vector to another. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: seigt.F SID: 2.4 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks @@ -84,15 +84,15 @@ c c----------------------------------------------------------------------- c - subroutine dseigt + subroutine dseigt & ( rnorm, n, h, ldh, eig, bounds, workl, ierr ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -127,7 +127,7 @@ subroutine dseigt c | External Subroutines | c %----------------------% c - external dcopy, dstqrb, dvout, second + external dcopy, dstqrb, dvout, arscnd c c %-----------------------% c | Executable Statements | @@ -136,9 +136,9 @@ subroutine dseigt c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | -c %-------------------------------% +c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mseigt c if (msglvl .gt. 0) then @@ -167,8 +167,8 @@ subroutine dseigt do 30 k = 1, n bounds(k) = rnorm*abs(bounds(k)) 30 continue -c - call second (t1) +c + call arscnd (t1) tseigt = tseigt + (t1 - t0) c 9000 continue diff --git a/Toolbox/arpack-src/dsesrt.f b/Toolbox/arpack-src/dsesrt.f index 2b4ca8cbc..833fba4e6 100644 --- a/Toolbox/arpack-src/dsesrt.f +++ b/Toolbox/arpack-src/dsesrt.f @@ -4,7 +4,7 @@ c\Name: dsesrt c c\Description: -c Sort the array X in the order specified by WHICH and optionally +c Sort the array X in the order specified by WHICH and optionally c apply the permutation to the columns of the matrix A. c c\Usage: @@ -32,7 +32,7 @@ c Number of rows of the matrix A. c c A Double precision array of length NA by N. (INPUT/OUTPUT) -c +c c LDA Integer. (INPUT) c Leading dimension of A. c @@ -47,18 +47,18 @@ c c\Authors c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c 12/15/93: Version ' 2.1'. -c Adapted from the sort routine in LANSO and +c Adapted from the sort routine in LANSO and c the ARPACK code dsortr c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sesrt.F SID: 2.3 DATE OF SID: 4/19/96 RELEASE: 2 c c\EndLib @@ -101,7 +101,7 @@ subroutine dsesrt (which, apply, n, x, na, a, lda) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'SA') then c c X is sorted into decreasing order of algebraic. @@ -165,7 +165,7 @@ subroutine dsesrt (which, apply, n, x, na, a, lda) 80 continue c if (j.lt.0) go to 90 -c +c if (x(j).gt.x(j+igap)) then temp = x(j) x(j) = x(j+igap) @@ -179,7 +179,7 @@ subroutine dsesrt (which, apply, n, x, na, a, lda) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'LM') then c c X is sorted into increasing order of magnitude. diff --git a/Toolbox/arpack-src/dseupd.f b/Toolbox/arpack-src/dseupd.f index 291ae1f85..ae123a207 100644 --- a/Toolbox/arpack-src/dseupd.f +++ b/Toolbox/arpack-src/dseupd.f @@ -1,8 +1,8 @@ c\BeginDoc c -c\Name: dseupd +c\Name: dseupd c -c\Description: +c\Description: c c This subroutine returns the converged approximations to eigenvalues c of A*z = lambda*B*z and (optionally): @@ -15,22 +15,22 @@ c (3) Both. c c There is negligible additional cost to obtain eigenvectors. An orthonormal -c (Lanczos) basis is always computed. There is an additional storage cost -c of n*nev if both are requested (in this case a separate array Z must be +c (Lanczos) basis is always computed. There is an additional storage cost +c of n*nev if both are requested (in this case a separate array Z must be c supplied). c c These quantities are obtained from the Lanczos factorization computed c by DSAUPD for the linear operator OP prescribed by the MODE selection c (see IPARAM(7) in DSAUPD documentation.) DSAUPD must be called before -c this routine is called. These approximate eigenvalues and vectors are -c commonly called Ritz values and Ritz vectors respectively. They are -c referred to as such in the comments that follow. The computed orthonormal -c basis for the invariant subspace corresponding to these Ritz values is +c this routine is called. These approximate eigenvalues and vectors are +c commonly called Ritz values and Ritz vectors respectively. They are +c referred to as such in the comments that follow. The computed orthonormal +c basis for the invariant subspace corresponding to these Ritz values is c referred to as a Lanczos basis. c -c See documentation in the header of the subroutine DSAUPD for a definition -c of OP as well as other terms and the relation of computed Ritz values -c and vectors of OP with respect to the given problem A*z = lambda*B*z. +c See documentation in the header of the subroutine DSAUPD for a definition +c of OP as well as other terms and the relation of computed Ritz values +c and vectors of OP with respect to the given problem A*z = lambda*B*z. c c The approximate eigenvalues of the original problem are returned in c ascending algebraic order. The user may elect to call this routine @@ -39,19 +39,19 @@ c with a single call. c c\Usage: -c call dseupd +c call dseupd c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, BMAT, N, WHICH, NEV, TOL, c RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, WORKL, LWORKL, INFO ) c -c RVEC LOGICAL (INPUT) -c Specifies whether Ritz vectors corresponding to the Ritz value +c RVEC LOGICAL (INPUT) +c Specifies whether Ritz vectors corresponding to the Ritz value c approximations to the eigenproblem A*z = lambda*B*z are computed. c c RVEC = .FALSE. Compute Ritz values only. c c RVEC = .TRUE. Compute Ritz vectors. c -c HOWMNY Character*1 (INPUT) +c HOWMNY Character*1 (INPUT) c Specifies how many Ritz vectors are wanted and the form of Z c the matrix of Ritz vectors. See remark 1 below. c = 'A': compute NEV Ritz vectors; @@ -61,7 +61,7 @@ c SELECT Logical array of dimension NCV. (INPUT/WORKSPACE) c If HOWMNY = 'S', SELECT specifies the Ritz vectors to be c computed. To select the Ritz vector corresponding to a -c Ritz value D(j), SELECT(j) must be set to .TRUE.. +c Ritz value D(j), SELECT(j) must be set to .TRUE.. c If HOWMNY = 'A' , SELECT is used as a workspace for c reordering the Ritz values. c @@ -70,8 +70,8 @@ c eigenvalues of A*z = lambda*B*z. The values are returned c in ascending order. If IPARAM(7) = 3,4,5 then D represents c the Ritz values of OP computed by dsaupd transformed to -c those of the original eigensystem A*z = lambda*B*z. If -c IPARAM(7) = 1,2 then the Ritz values of OP are the same +c those of the original eigensystem A*z = lambda*B*z. If +c IPARAM(7) = 1,2 then the Ritz values of OP are the same c as the those of A*z = lambda*B*z. c c Z Double precision N by NEV array if HOWMNY = 'A'. (OUTPUT) @@ -79,7 +79,7 @@ c eigensystem A*z = lambda*B*z corresponding to the Ritz c value approximations. c If RVEC = .FALSE. then Z is not referenced. -c NOTE: The array Z may be set equal to first NEV columns of the +c NOTE: The array Z may be set equal to first NEV columns of the c Arnoldi/Lanczos basis array V computed by DSAUPD . c c LDZ Integer. (INPUT) @@ -144,7 +144,7 @@ c = -17: DSEUPD got a different count of the number of converged c Ritz values than DSAUPD got. This indicates the user c probably made an error in passing data from DSAUPD to -c DSEUPD or that the data was modified before entering +c DSEUPD or that the data was modified before entering c DSEUPD . c c\BeginLib @@ -153,7 +153,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall, @@ -163,19 +163,19 @@ c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to c Implement the Spectral Transformation", Math. Comp., 48 (1987), c pp 663-673. -c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos -c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", +c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos +c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", c SIAM J. Matr. Anal. Apps., January (1993). c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines c for Updating the QR decomposition", ACM TOMS, December 1990, c Volume 16 Number 4, pp 369-377. c c\Remarks -c 1. The converged Ritz values are always returned in increasing +c 1. The converged Ritz values are always returned in increasing c (algebraic) order. c c 2. Currently only HOWMNY = 'A' is implemented. It is included at this -c stage for the user who wants to incorporate it. +c stage for the user who wants to incorporate it. c c\Routines called: c dsesrt ARPACK routine that sorts an array X, and applies the @@ -201,15 +201,15 @@ c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Chao Yang Houston, Texas -c Dept. of Computational & +c Dept. of Computational & c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c 12/15/93: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: seupd.F SID: 2.11 DATE OF SID: 04/10/01 RELEASE: 2 c c\EndLib @@ -226,8 +226,8 @@ subroutine dseupd (rvec , howmny, select, d , c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -236,7 +236,7 @@ subroutine dseupd (rvec , howmny, select, d , character bmat, howmny, which*2 logical rvec integer info, ldz, ldv, lworkl, n, ncv, nev - Double precision + Double precision & sigma, tol c c %-----------------% @@ -245,7 +245,7 @@ subroutine dseupd (rvec , howmny, select, d , c integer iparam(7), ipntr(11) logical select(ncv) - Double precision + Double precision & d(nev) , resid(n) , v(ldv,ncv), & z(ldz, nev), workd(2*n), workl(lworkl) c @@ -253,7 +253,7 @@ subroutine dseupd (rvec , howmny, select, d , c | Parameters | c %------------% c - Double precision + Double precision & one, zero parameter (one = 1.0D+0 , zero = 0.0D+0 ) c @@ -267,7 +267,7 @@ subroutine dseupd (rvec , howmny, select, d , & ldq , mode , msglvl, nconv , next , & ritz , irz , ibd , np , ishift, & leftptr, rghtptr, numcnv, jj - Double precision + Double precision & bnorm2 , rnorm, temp, temp1, eps23 logical reord c @@ -275,16 +275,16 @@ subroutine dseupd (rvec , howmny, select, d , c | External Subroutines | c %----------------------% c - external dcopy , dger , dgeqr2 , dlacpy , dorm2r , dscal , - & dsesrt , dsteqr , dswap , dvout , ivout , dsortr + external dcopy , dger , dgeqr2 , dlacpy , dorm2r , dscal , + & dsesrt , dsteqr , dswap , dvout , ivout , dsortr c c %--------------------% c | External Functions | c %--------------------% c - Double precision - & dnrm2 , dlamch - external dnrm2 , dlamch + Double precision + & dnrm2 , dlamch + external dnrm2 , dlamch c c %---------------------% c | Intrinsic Functions | @@ -295,7 +295,7 @@ subroutine dseupd (rvec , howmny, select, d , c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -312,7 +312,7 @@ subroutine dseupd (rvec , howmny, select, d , if (nconv .eq. 0) go to 9000 ierr = 0 c - if (nconv .le. 0) ierr = -14 + if (nconv .le. 0) ierr = -14 if (n .le. 0) ierr = -1 if (nev .le. 0) ierr = -2 if (ncv .le. nev .or. ncv .gt. n) ierr = -3 @@ -324,12 +324,12 @@ subroutine dseupd (rvec , howmny, select, d , if (bmat .ne. 'I' .and. bmat .ne. 'G') ierr = -6 if ( (howmny .ne. 'A' .and. & howmny .ne. 'P' .and. - & howmny .ne. 'S') .and. rvec ) + & howmny .ne. 'S') .and. rvec ) & ierr = -15 if (rvec .and. howmny .eq. 'S') ierr = -16 c if (rvec .and. lworkl .lt. ncv**2+8*ncv) ierr = -7 -c +c if (mode .eq. 1 .or. mode .eq. 2) then type = 'REGULR' else if (mode .eq. 3 ) then @@ -338,7 +338,7 @@ subroutine dseupd (rvec , howmny, select, d , type = 'BUCKLE' else if (mode .eq. 5 ) then type = 'CAYLEY' - else + else ierr = -10 end if if (mode .eq. 1 .and. bmat .eq. 'G') ierr = -11 @@ -352,7 +352,7 @@ subroutine dseupd (rvec , howmny, select, d , info = ierr go to 9000 end if -c +c c %-------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -427,7 +427,7 @@ subroutine dseupd (rvec , howmny, select, d , c | Set machine dependent constant. | c %---------------------------------% c - eps23 = dlamch ('Epsilon-Machine') + eps23 = dlamch ('Epsilon-Machine') eps23 = eps23**(2.0D+0 / 3.0D+0 ) c c %---------------------------------------% @@ -501,7 +501,7 @@ subroutine dseupd (rvec , howmny, select, d , & workl(ibd+jj-1) .le. tol*temp1) then select(jj) = .true. numcnv = numcnv + 1 - if (jj .gt. nev) reord = .true. + if (jj .gt. nconv) reord = .true. endif 11 continue c @@ -513,9 +513,9 @@ subroutine dseupd (rvec , howmny, select, d , c %-----------------------------------------------------------% c if (msglvl .gt. 2) then - call ivout(logfil, 1, numcnv, ndigit, + call ivout(logfil, 1, [numcnv], ndigit, & '_seupd: Number of specified eigenvalues') - call ivout(logfil, 1, nconv, ndigit, + call ivout(logfil, 1, [nconv], ndigit, & '_seupd: Number of "converged" eigenvalues') end if c @@ -609,9 +609,9 @@ subroutine dseupd (rvec , howmny, select, d , c if (leftptr .lt. rghtptr) go to 20 c - 30 end if + end if c - if (msglvl .gt. 2) then + 30 if (msglvl .gt. 2) then call dvout (logfil, ncv, workl(ihd), ndigit, & '_seupd: The eigenvalues of H--reordered') end if @@ -652,8 +652,8 @@ subroutine dseupd (rvec , howmny, select, d , call dcopy (ncv, workl(bounds), 1, workl(ihb), 1) end if c - else -c + else +c c %-------------------------------------------------------------% c | * Make a copy of all the Ritz values. | c | * Transform the Ritz values back to the original system. | @@ -670,13 +670,13 @@ subroutine dseupd (rvec , howmny, select, d , c %-------------------------------------------------------------% c call dcopy (ncv, workl(ihd), 1, workl(iw), 1) - if (type .eq. 'SHIFTI') then + if (type .eq. 'SHIFTI') then do 40 k=1, ncv workl(ihd+k-1) = one / workl(ihd+k-1) + sigma 40 continue else if (type .eq. 'BUCKLE') then do 50 k=1, ncv - workl(ihd+k-1) = sigma * workl(ihd+k-1) / + workl(ihd+k-1) = sigma * workl(ihd+k-1) / & (workl(ihd+k-1) - one) 50 continue else if (type .eq. 'CAYLEY') then @@ -685,7 +685,7 @@ subroutine dseupd (rvec , howmny, select, d , & (workl(ihd+k-1) - one) 60 continue end if -c +c c %-------------------------------------------------------------% c | * Store the wanted NCONV lambda values into D. | c | * Sort the NCONV wanted lambda in WORKL(IHD:IHD+NCONV-1) | @@ -711,8 +711,8 @@ subroutine dseupd (rvec , howmny, select, d , call dsortr ('LA', .true., nconv, d, workl(ihb)) end if c - end if -c + end if +c c %------------------------------------------------% c | Compute the Ritz vectors. Transform the wanted | c | eigenvectors of the symmetric tridiagonal H by | @@ -720,25 +720,25 @@ subroutine dseupd (rvec , howmny, select, d , c %------------------------------------------------% c if (rvec .and. howmny .eq. 'A') then -c +c c %----------------------------------------------------------% c | Compute the QR factorization of the matrix representing | c | the wanted invariant subspace located in the first NCONV | c | columns of workl(iq,ldq). | c %----------------------------------------------------------% -c +c call dgeqr2 (ncv, nconv , workl(iq) , & ldq, workl(iw+ncv), workl(ihb), & ierr) c c %--------------------------------------------------------% -c | * Postmultiply V by Q. | +c | * Postmultiply V by Q. | c | * Copy the first NCONV columns of VQ into Z. | c | The N by NCONV matrix Z is now a matrix representation | c | of the approximate invariant subspace associated with | c | the Ritz values in workl(ihd). | c %--------------------------------------------------------% -c +c call dorm2r ('Right', 'Notranspose', n , & ncv , nconv , workl(iq), & ldq , workl(iw+ncv), v , @@ -752,7 +752,7 @@ subroutine dseupd (rvec , howmny, select, d , c %-----------------------------------------------------% c do 65 j = 1, ncv-1 - workl(ihb+j-1) = zero + workl(ihb+j-1) = zero 65 continue workl(ihb+ncv-1) = one call dorm2r ('Left', 'Transpose' , ncv , @@ -760,6 +760,16 @@ subroutine dseupd (rvec , howmny, select, d , & ldq , workl(iw+ncv), workl(ihb), & ncv , temp , ierr) c +c %-----------------------------------------------------% +c | Make a copy of the last row into | +c | workl(iw+ncv:iw+2*ncv), as it is needed again in | +c | the Ritz vector purification step below | +c %-----------------------------------------------------% +c + do 67 j = 1, nconv + workl(iw+ncv+j-1) = workl(ihb+j-1) + 67 continue + else if (rvec .and. howmny .eq. 'S') then c c Not yet implemented. See remark 2 above. @@ -784,10 +794,10 @@ subroutine dseupd (rvec , howmny, select, d , c %-------------------------------------------------% c call dscal (ncv, bnorm2, workl(ihb), 1) - if (type .eq. 'SHIFTI') then + if (type .eq. 'SHIFTI') then c do 80 k=1, ncv - workl(ihb+k-1) = abs( workl(ihb+k-1) ) + workl(ihb+k-1) = abs( workl(ihb+k-1) ) & / workl(iw+k-1)**2 80 continue c @@ -812,15 +822,15 @@ subroutine dseupd (rvec , howmny, select, d , if (type .ne. 'REGULR' .and. msglvl .gt. 1) then call dvout (logfil, nconv, d, ndigit, & '_seupd: Untransformed converged Ritz values') - call dvout (logfil, nconv, workl(ihb), ndigit, + call dvout (logfil, nconv, workl(ihb), ndigit, & '_seupd: Ritz estimates of the untransformed Ritz values') else if (msglvl .gt. 1) then call dvout (logfil, nconv, d, ndigit, & '_seupd: Converged Ritz values') - call dvout (logfil, nconv, workl(ihb), ndigit, + call dvout (logfil, nconv, workl(ihb), ndigit, & '_seupd: Associated Ritz estimates') end if -c +c c %-------------------------------------------------% c | Ritz vector purification step. Formally perform | c | one of inverse subspace iteration. Only used | @@ -830,20 +840,20 @@ subroutine dseupd (rvec , howmny, select, d , if (rvec .and. (type .eq. 'SHIFTI' .or. type .eq. 'CAYLEY')) then c do 110 k=0, nconv-1 - workl(iw+k) = workl(iq+k*ldq+ncv-1) + workl(iw+k) = workl(iw+ncv+k) & / workl(iw+k) 110 continue c else if (rvec .and. type .eq. 'BUCKLE') then c do 120 k=0, nconv-1 - workl(iw+k) = workl(iq+k*ldq+ncv-1) + workl(iw+k) = workl(iw+ncv+k) & / (workl(iw+k)-one) 120 continue c - end if + end if c - if (type .ne. 'REGULR') + if (rvec .and. type .ne. 'REGULR') & call dger (n, nconv, one, resid, 1, workl(iw), 1, z, ldz) c 9000 continue diff --git a/Toolbox/arpack-src/dsgets.f b/Toolbox/arpack-src/dsgets.f index b51143d01..436a4fe84 100644 --- a/Toolbox/arpack-src/dsgets.f +++ b/Toolbox/arpack-src/dsgets.f @@ -3,13 +3,13 @@ c c\Name: dsgets c -c\Description: +c\Description: c Given the eigenvalues of the symmetric tridiagonal matrix H, -c computes the NP shifts AMU that are zeros of the polynomial of -c degree NP which filters out components of the unwanted eigenvectors +c computes the NP shifts AMU that are zeros of the polynomial of +c degree NP which filters out components of the unwanted eigenvectors c corresponding to the AMU's based on some given criteria. c -c NOTE: This is called even in the case of user specified shifts in +c NOTE: This is called even in the case of user specified shifts in c order to sort the eigenvalues, and error bounds of H for later use. c c\Usage: @@ -39,8 +39,8 @@ c c RITZ Double precision array of length KEV+NP. (INPUT/OUTPUT) c On INPUT, RITZ contains the eigenvalues of H. -c On OUTPUT, RITZ are sorted so that the unwanted eigenvalues -c are in the first NP locations and the wanted part is in +c On OUTPUT, RITZ are sorted so that the unwanted eigenvalues +c are in the first NP locations and the wanted part is in c the last KEV locations. When exact shifts are selected, the c unwanted part corresponds to the shifts to be applied. c @@ -49,7 +49,7 @@ c c SHIFTS Double precision array of length NP. (INPUT/OUTPUT) c On INPUT: contains the user specified shifts if ISHIFT = 0. -c On OUTPUT: contains the shifts sorted into decreasing order +c On OUTPUT: contains the shifts sorted into decreasing order c of magnitude with respect to the Ritz estimates contained in c BOUNDS. If ISHIFT = 0, SHIFTS is not modified on exit. c @@ -65,7 +65,7 @@ c\Routines called: c dsortr ARPACK utility sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c dvout ARPACK utility routine that prints vectors. c dcopy Level 1 BLAS that copies one vector to another. c dswap Level 1 BLAS that swaps the contents of two vectors. @@ -75,13 +75,13 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/93: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sgets.F SID: 2.4 DATE OF SID: 4/19/96 RELEASE: 2 c c\Remarks @@ -96,8 +96,8 @@ subroutine dsgets ( ishift, which, kev, np, ritz, bounds, shifts ) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -131,7 +131,7 @@ subroutine dsgets ( ishift, which, kev, np, ritz, bounds, shifts ) c | External Subroutines | c %----------------------% c - external dswap, dcopy, dsortr, second + external dswap, dcopy, dsortr, arscnd c c %---------------------% c | Intrinsic Functions | @@ -142,15 +142,15 @@ subroutine dsgets ( ishift, which, kev, np, ritz, bounds, shifts ) c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msgets -c +c if (which .eq. 'BE') then c c %-----------------------------------------------------% @@ -163,11 +163,11 @@ subroutine dsgets ( ishift, which, kev, np, ritz, bounds, shifts ) c %-----------------------------------------------------% c call dsortr ('LA', .true., kev+np, ritz, bounds) - kevd2 = kev / 2 + kevd2 = kev / 2 if ( kev .gt. 1 ) then - call dswap ( min(kevd2,np), ritz, 1, + call dswap ( min(kevd2,np), ritz, 1, & ritz( max(kevd2,np)+1 ), 1) - call dswap ( min(kevd2,np), bounds, 1, + call dswap ( min(kevd2,np), bounds, 1, & bounds( max(kevd2,np)+1 ), 1) end if c @@ -185,7 +185,7 @@ subroutine dsgets ( ishift, which, kev, np, ritz, bounds, shifts ) end if c if (ishift .eq. 1 .and. np .gt. 0) then -c +c c %-------------------------------------------------------% c | Sort the unwanted Ritz values used as shifts so that | c | the ones with largest Ritz estimates are first. | @@ -193,23 +193,23 @@ subroutine dsgets ( ishift, which, kev, np, ritz, bounds, shifts ) c | forward instability of the iteration when the shifts | c | are applied in subroutine dsapps. | c %-------------------------------------------------------% -c +c call dsortr ('SM', .true., np, bounds, ritz) call dcopy (np, ritz, 1, shifts, 1) end if -c - call second (t1) +c + call arscnd (t1) tsgets = tsgets + (t1 - t0) c if (msglvl .gt. 0) then - call ivout (logfil, 1, kev, ndigit, '_sgets: KEV is') - call ivout (logfil, 1, np, ndigit, '_sgets: NP is') + call ivout (logfil, 1, [kev], ndigit, '_sgets: KEV is') + call ivout (logfil, 1, [np], ndigit, '_sgets: NP is') call dvout (logfil, kev+np, ritz, ndigit, & '_sgets: Eigenvalues of current H matrix') - call dvout (logfil, kev+np, bounds, ndigit, + call dvout (logfil, kev+np, bounds, ndigit, & '_sgets: Associated Ritz estimates') end if -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/dsortc.f b/Toolbox/arpack-src/dsortc.f index 91af30f8a..42baae2ba 100644 --- a/Toolbox/arpack-src/dsortc.f +++ b/Toolbox/arpack-src/dsortc.f @@ -4,7 +4,7 @@ c\Name: dsortc c c\Description: -c Sorts the complex array in XREAL and XIMAG into the order +c Sorts the complex array in XREAL and XIMAG into the order c specified by WHICH and optionally applies the permutation to the c real array Y. It is assumed that if an element of XIMAG is c nonzero, then its negative is also an element. In other words, @@ -49,14 +49,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c Adapted from the sort routine in LANSO. c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sortc.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\EndLib @@ -77,7 +77,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) c | Array Arguments | c %-----------------% c - Double precision + Double precision & xreal(0:n-1), ximag(0:n-1), y(0:n-1) c c %---------------% @@ -85,14 +85,14 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) c %---------------% c integer i, igap, j - Double precision + Double precision & temp, temp1, temp2 c c %--------------------% c | External Functions | c %--------------------% c - Double precision + Double precision & dlapy2 external dlapy2 c @@ -101,7 +101,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'LM') then c c %------------------------------------------------------% @@ -169,7 +169,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -183,7 +183,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) 60 continue igap = igap / 2 go to 40 -c +c else if (which .eq. 'LR') then c c %------------------------------------------------% @@ -207,7 +207,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -221,7 +221,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'SR') then c c %------------------------------------------------% @@ -244,7 +244,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -258,7 +258,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) 120 continue igap = igap / 2 go to 100 -c +c else if (which .eq. 'LI') then c c %------------------------------------------------% @@ -281,7 +281,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -295,7 +295,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) 150 continue igap = igap / 2 go to 130 -c +c else if (which .eq. 'SI') then c c %------------------------------------------------% @@ -318,7 +318,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -333,7 +333,7 @@ subroutine dsortc (which, apply, n, xreal, ximag, y) igap = igap / 2 go to 160 end if -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/dsortr.f b/Toolbox/arpack-src/dsortr.f index 3903b81c5..b44f916cf 100644 --- a/Toolbox/arpack-src/dsortr.f +++ b/Toolbox/arpack-src/dsortr.f @@ -4,7 +4,7 @@ c\Name: dsortr c c\Description: -c Sort the array X1 in the order specified by WHICH and optionally +c Sort the array X1 in the order specified by WHICH and optionally c applies the permutation to the array X2. c c\Usage: @@ -39,17 +39,17 @@ c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c 12/16/93: Version ' 2.1'. c Adapted from the sort routine in LANSO. c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sortr.F SID: 2.3 DATE OF SID: 4/19/96 RELEASE: 2 c c\EndLib @@ -86,7 +86,7 @@ subroutine dsortr (which, apply, n, x1, x2) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'SA') then c c X1 is sorted into decreasing order of algebraic. @@ -158,7 +158,7 @@ subroutine dsortr (which, apply, n, x1, x2) 80 continue c if (j.lt.0) go to 90 -c +c if (x1(j).gt.x1(j+igap)) then temp = x1(j) x1(j) = x1(j+igap) @@ -176,7 +176,7 @@ subroutine dsortr (which, apply, n, x1, x2) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'LM') then c c X1 is sorted into increasing order of magnitude. diff --git a/Toolbox/arpack-src/dstatn.f b/Toolbox/arpack-src/dstatn.f index 1bf371459..d09d8a371 100644 --- a/Toolbox/arpack-src/dstatn.f +++ b/Toolbox/arpack-src/dstatn.f @@ -9,10 +9,10 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: statn.F SID: 2.4 DATE OF SID: 4/20/96 RELEASE: 2 c subroutine dstatn @@ -21,8 +21,8 @@ subroutine dstatn c | See stat.doc for documentation | c %--------------------------------% c - include 'stat.fi' -c + include 'stat.h' +c c %-----------------------% c | Executable Statements | c %-----------------------% @@ -32,7 +32,7 @@ subroutine dstatn nrorth = 0 nitref = 0 nrstrt = 0 -c +c tnaupd = 0.0D+0 tnaup2 = 0.0D+0 tnaitr = 0.0D+0 @@ -43,14 +43,14 @@ subroutine dstatn titref = 0.0D+0 tgetv0 = 0.0D+0 trvec = 0.0D+0 -c +c c %----------------------------------------------------% c | User time including reverse communication overhead | c %----------------------------------------------------% c tmvopx = 0.0D+0 tmvbx = 0.0D+0 -c +c return c c diff --git a/Toolbox/arpack-src/dstats.f b/Toolbox/arpack-src/dstats.f index f1ad95d99..cb1b3f38d 100644 --- a/Toolbox/arpack-src/dstats.f +++ b/Toolbox/arpack-src/dstats.f @@ -1,18 +1,18 @@ c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: stats.F SID: 2.1 DATE OF SID: 4/19/96 RELEASE: 2 c %---------------------------------------------% c | Initialize statistic and timing information | c | for symmetric Arnoldi code. | c %---------------------------------------------% - + subroutine dstats c %--------------------------------% c | See stat.doc for documentation | c %--------------------------------% - include 'stat.fi' - + include 'stat.h' + c %-----------------------% c | Executable Statements | c %-----------------------% @@ -22,7 +22,7 @@ subroutine dstats nrorth = 0 nitref = 0 nrstrt = 0 - + tsaupd = 0.0D+0 tsaup2 = 0.0D+0 tsaitr = 0.0D+0 @@ -33,13 +33,13 @@ subroutine dstats titref = 0.0D+0 tgetv0 = 0.0D+0 trvec = 0.0D+0 - + c %----------------------------------------------------% c | User time including reverse communication overhead | c %----------------------------------------------------% tmvopx = 0.0D+0 tmvbx = 0.0D+0 - + return c c End of dstats diff --git a/Toolbox/arpack-src/dstqrb.f b/Toolbox/arpack-src/dstqrb.f index 9fef543ba..d55a59a2d 100644 --- a/Toolbox/arpack-src/dstqrb.f +++ b/Toolbox/arpack-src/dstqrb.f @@ -32,13 +32,13 @@ c On exit, E has been destroyed. c c Z Double precision array, dimension (N). (OUTPUT) -c On exit, Z contains the last row of the orthonormal -c eigenvector matrix of the symmetric tridiagonal matrix. +c On exit, Z contains the last row of the orthonormal +c eigenvector matrix of the symmetric tridiagonal matrix. c If an error exit is made, Z contains the last row of the c eigenvector matrix associated with the stored eigenvalues. c c WORK Double precision array, dimension (max(1,2*N-2)). (WORKSPACE) -c Workspace used in accumulating the transformation for +c Workspace used in accumulating the transformation for c computing the last components of the eigenvectors. c c INFO Integer. (OUTPUT) @@ -62,9 +62,9 @@ c dcopy Level 1 BLAS that copies one vector to another. c dswap Level 1 BLAS that swaps the contents of two vectors. c lsame LAPACK character comparison routine. -c dlae2 LAPACK routine that computes the eigenvalues of a 2-by-2 +c dlae2 LAPACK routine that computes the eigenvalues of a 2-by-2 c symmetric matrix. -c dlaev2 LAPACK routine that eigendecomposition of a 2-by-2 symmetric +c dlaev2 LAPACK routine that eigendecomposition of a 2-by-2 symmetric c matrix. c dlamch LAPACK routine that determines machine constants. c dlanst LAPACK routine that computes the norm of a matrix. @@ -72,7 +72,7 @@ c dlartg LAPACK Givens rotation construction routine. c dlascl LAPACK routine for careful scaling of a matrix. c dlaset LAPACK matrix initialization routine. -c dlasr LAPACK routine that applies an orthogonal transformation to +c dlasr LAPACK routine that applies an orthogonal transformation to c a matrix. c dlasrt LAPACK sorting routine. c dsteqr LAPACK routine that computes eigenvalues and eigenvectors @@ -84,19 +84,19 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: stqrb.F SID: 2.5 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks c 1. Starting with version 2.5, this routine is a modified version c of LAPACK version 2.0 subroutine SSTEQR. No lines are deleted, -c only commeted out and new lines inserted. +c only commented out and new lines inserted. c All lines commented out have "c$$$" at the beginning. c Note that the LAPACK version 1.0 subroutine SSTEQR contained -c bugs. +c bugs. c c\EndLib c @@ -118,9 +118,9 @@ subroutine dstqrb ( n, d, e, z, work, info ) & d( n ), e( n-1 ), z( n ), work( 2*n-2 ) c c .. parameters .. - Double precision + Double precision & zero, one, two, three - parameter ( zero = 0.0D+0, one = 1.0D+0, + parameter ( zero = 0.0D+0, one = 1.0D+0, & two = 2.0D+0, three = 3.0D+0 ) integer maxit parameter ( maxit = 30 ) @@ -129,7 +129,7 @@ subroutine dstqrb ( n, d, e, z, work, info ) integer i, icompz, ii, iscale, j, jtot, k, l, l1, lend, & lendm1, lendp1, lendsv, lm1, lsv, m, mm, mm1, & nm1, nmaxit - Double precision + Double precision & anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, & s, safmax, safmin, ssfmax, ssfmin, tst c .. @@ -380,9 +380,9 @@ subroutine dstqrb ( n, d, e, z, work, info ) c c *** New starting with version 2.5 *** c - call dlasr( 'r', 'v', 'b', 1, mm, work( l ), + call dlasr( 'r', 'v', 'b', 1, mm, work( l ), & work( n-1+l ), z( l ), 1 ) -c ************************************* +c ************************************* end if c d( l ) = d( l ) - p @@ -440,7 +440,7 @@ subroutine dstqrb ( n, d, e, z, work, info ) tst = z(l) z(l) = c*tst - s*z(l-1) z(l-1) = s*tst + c*z(l-1) -c ************************************* +c ************************************* else call dlae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 ) end if @@ -502,7 +502,7 @@ subroutine dstqrb ( n, d, e, z, work, info ) c call dlasr( 'r', 'v', 'f', 1, mm, work( m ), work( n-1+m ), & z( m ), 1 ) -c ************************************* +c ************************************* end if c d( l ) = d( l ) - p diff --git a/Toolbox/arpack-src/icnteq.f b/Toolbox/arpack-src/icnteq.f deleted file mode 100644 index dc345f9ba..000000000 --- a/Toolbox/arpack-src/icnteq.f +++ /dev/null @@ -1,18 +0,0 @@ -c -c----------------------------------------------------------------------- -c -c Count the number of elements equal to a specified integer value. -c - integer function icnteq (n, array, value) -c - integer n, value - integer array(*) -c - k = 0 - do 10 i = 1, n - if (array(i) .eq. value) k = k + 1 - 10 continue - icnteq = k -c - return - end diff --git a/Toolbox/arpack-src/icopy.f b/Toolbox/arpack-src/icopy.f deleted file mode 100644 index f9e8c1100..000000000 --- a/Toolbox/arpack-src/icopy.f +++ /dev/null @@ -1,77 +0,0 @@ -*-------------------------------------------------------------------- -*\Documentation -* -*\Name: ICOPY -* -*\Description: -* ICOPY copies an integer vector lx to an integer vector ly. -* -*\Usage: -* call icopy ( n, lx, inc, ly, incy ) -* -*\Arguments: -* n integer (input) -* On entry, n is the number of elements of lx to be -c copied to ly. -* -* lx integer array (input) -* On entry, lx is the integer vector to be copied. -* -* incx integer (input) -* On entry, incx is the increment between elements of lx. -* -* ly integer array (input) -* On exit, ly is the integer vector that contains the -* copy of lx. -* -* incy integer (input) -* On entry, incy is the increment between elements of ly. -* -*\Enddoc -* -*-------------------------------------------------------------------- -* - subroutine icopy( n, lx, incx, ly, incy ) -* -* ---------------------------- -* Specifications for arguments -* ---------------------------- - integer incx, incy, n - integer lx( 1 ), ly( 1 ) -* -* ---------------------------------- -* Specifications for local variables -* ---------------------------------- - integer i, ix, iy -* -* -------------------------- -* First executable statement -* -------------------------- - if( n.le.0 ) - $ return - if( incx.eq.1 .and. incy.eq.1 ) - $ go to 20 -c -c.....code for unequal increments or equal increments -c not equal to 1 - ix = 1 - iy = 1 - if( incx.lt.0 ) - $ ix = ( -n+1 )*incx + 1 - if( incy.lt.0 ) - $ iy = ( -n+1 )*incy + 1 - do 10 i = 1, n - ly( iy ) = lx( ix ) - ix = ix + incx - iy = iy + incy - 10 continue - return -c -c.....code for both increments equal to 1 -c - 20 continue - do 30 i = 1, n - ly( i ) = lx( i ) - 30 continue - return - end diff --git a/Toolbox/arpack-src/iset.f b/Toolbox/arpack-src/iset.f deleted file mode 100644 index cb690bc3e..000000000 --- a/Toolbox/arpack-src/iset.f +++ /dev/null @@ -1,16 +0,0 @@ -c -c----------------------------------------------------------------------- -c -c Only work with increment equal to 1 right now. -c - subroutine iset (n, value, array, inc) -c - integer n, value, inc - integer array(*) -c - do 10 i = 1, n - array(i) = value - 10 continue -c - return - end diff --git a/Toolbox/arpack-src/iswap.f b/Toolbox/arpack-src/iswap.f deleted file mode 100644 index 088798d00..000000000 --- a/Toolbox/arpack-src/iswap.f +++ /dev/null @@ -1,55 +0,0 @@ - subroutine iswap (n,sx,incx,sy,incy) -c -c interchanges two vectors. -c uses unrolled loops for increments equal to 1. -c jack dongarra, linpack, 3/11/78. -c - integer sx(1),sy(1),stemp - integer i,incx,incy,ix,iy,m,mp1,n -c - if(n.le.0)return - if(incx.eq.1.and.incy.eq.1)go to 20 -c -c code for unequal increments or equal increments not equal -c to 1 -c - ix = 1 - iy = 1 - if(incx.lt.0)ix = (-n+1)*incx + 1 - if(incy.lt.0)iy = (-n+1)*incy + 1 - do 10 i = 1,n - stemp = sx(ix) - sx(ix) = sy(iy) - sy(iy) = stemp - ix = ix + incx - iy = iy + incy - 10 continue - return -c -c code for both increments equal to 1 -c -c -c clean-up loop -c - 20 m = mod(n,3) - if( m .eq. 0 ) go to 40 - do 30 i = 1,m - stemp = sx(i) - sx(i) = sy(i) - sy(i) = stemp - 30 continue - if( n .lt. 3 ) return - 40 mp1 = m + 1 - do 50 i = mp1,n,3 - stemp = sx(i) - sx(i) = sy(i) - sy(i) = stemp - stemp = sx(i + 1) - sx(i + 1) = sy(i + 1) - sy(i + 1) = stemp - stemp = sx(i + 2) - sx(i + 2) = sy(i + 2) - sy(i + 2) = stemp - 50 continue - return - end diff --git a/Toolbox/arpack-src/second.f b/Toolbox/arpack-src/second.f deleted file mode 100644 index cdb3b6e2b..000000000 --- a/Toolbox/arpack-src/second.f +++ /dev/null @@ -1,5 +0,0 @@ - SUBROUTINE SECOND(T) - REAL T - T=0D0 - RETURN - END \ No newline at end of file diff --git a/Toolbox/arpack-src/second_NONE.f b/Toolbox/arpack-src/second_NONE.f new file mode 100644 index 000000000..01fcc9dcf --- /dev/null +++ b/Toolbox/arpack-src/second_NONE.f @@ -0,0 +1,36 @@ + SUBROUTINE ARSCND( T ) +* + REAL T +* +* -- LAPACK auxiliary routine (preliminary version) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* July 26, 1991 +* +* Purpose +* ======= +* +* SECOND returns the user time for a process in arscnds. +* This version gets the time from the system function ETIME. +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME + EXTERNAL ETIME +* .. +* .. Executable Statements .. +* + +c T1 = ETIME( TARRAY ) +c T = TARRAY( 1 ) + T = 0 + RETURN +* +* End of ARSCND +* + END diff --git a/Toolbox/arpack-src/sgetv0.f b/Toolbox/arpack-src/sgetv0.f index 331251075..26130a014 100644 --- a/Toolbox/arpack-src/sgetv0.f +++ b/Toolbox/arpack-src/sgetv0.f @@ -3,13 +3,13 @@ c c\Name: sgetv0 c -c\Description: +c\Description: c Generate a random initial residual vector for the Arnoldi process. -c Force the residual vector to be in the range of the operator OP. +c Force the residual vector to be in the range of the operator OP. c c\Usage: c call sgetv0 -c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, +c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, c IPNTR, WORKD, IERR ) c c\Arguments @@ -36,7 +36,7 @@ c B = 'G' -> generalized eigenvalue problem A*x = lambda*B*x c c ITRY Integer. (INPUT) -c ITRY counts the number of times that sgetv0 is called. +c ITRY counts the number of times that sgetv0 is called. c It should be set to 1 on the initial call to sgetv0. c c INITV Logical variable. (INPUT) @@ -55,11 +55,11 @@ c if this is a "restart". c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c RESID Real array of length N. (INPUT/OUTPUT) -c Initial residual vector to be generated. If RESID is +c Initial residual vector to be generated. If RESID is c provided, force RESID into the range of the operator OP. c c RNORM Real scalar. (OUTPUT) @@ -88,17 +88,17 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine for vector output. c slarnv LAPACK routine for generating a random vector. c sgemv Level 2 BLAS routine for matrix vector multiplication. c scopy Level 1 BLAS that copies one vector to another. -c sdot Level 1 BLAS that computes the scalar product of two vectors. +c sdot Level 1 BLAS that computes the scalar product of two vectors. c snrm2 Level 1 BLAS that computes the norm of a vector. c c\Author @@ -106,26 +106,26 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: getv0.F SID: 2.7 DATE OF SID: 04/07/99 RELEASE: 2 c c\EndLib c c----------------------------------------------------------------------- c - subroutine sgetv0 - & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, + subroutine sgetv0 + & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, & ipntr, workd, ierr ) -c +c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -157,17 +157,17 @@ subroutine sgetv0 c | Local Scalars & Arrays | c %------------------------% c - logical first, inits, orth + logical first, orth integer idist, iseed(4), iter, msglvl, jj Real & rnorm0 - save first, iseed, inits, iter, msglvl, orth, rnorm0 + save first, iseed, iter, msglvl, orth, rnorm0 c c %----------------------% c | External Subroutines | c %----------------------% c - external slarnv, svout, scopy, sgemv, second + external slarnv, svout, scopy, sgemv, arscnd c c %--------------------% c | External Functions | @@ -183,12 +183,6 @@ subroutine sgetv0 c intrinsic abs, sqrt c -c %-----------------% -c | Data Statements | -c %-----------------% -c - data inits /.true./ -c c %-----------------------% c | Executable Statements | c %-----------------------% @@ -199,24 +193,21 @@ subroutine sgetv0 c | random number generator | c %-----------------------------------% c - if (inits) then - iseed(1) = 1 - iseed(2) = 3 - iseed(3) = 5 - iseed(4) = 7 - inits = .false. - end if + iseed(1) = 1 + iseed(2) = 3 + iseed(3) = 5 + iseed(4) = 7 c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mgetv0 -c +c ierr = 0 iter = 0 first = .FALSE. @@ -235,23 +226,25 @@ subroutine sgetv0 idist = 2 call slarnv (idist, iseed, n, resid) end if -c +c c %----------------------------------------------------------% c | Force the starting vector into the range of OP to handle | c | the generalized problem when B is possibly (singular). | c %----------------------------------------------------------% c - call second (t2) - if (bmat .eq. 'G') then + call arscnd (t2) + if (itry .eq. 1) then nopx = nopx + 1 ipntr(1) = 1 ipntr(2) = n + 1 call scopy (n, resid, 1, workd, 1) ido = -1 go to 9000 + else if (itry .gt. 1 .and. bmat .eq. 'G') then + call scopy (n, resid, 1, workd(n + 1), 1) end if end if -c +c c %-----------------------------------------% c | Back from computing OP*(initial-vector) | c %-----------------------------------------% @@ -259,26 +252,26 @@ subroutine sgetv0 if (first) go to 20 c c %-----------------------------------------------% -c | Back from computing B*(orthogonalized-vector) | +c | Back from computing OP*(orthogonalized-vector) | c %-----------------------------------------------% c if (orth) go to 40 -c +c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) end if -c +c c %------------------------------------------------------% c | Starting vector is now in the range of OP; r = OP*r; | c | Compute B-norm of starting vector. | c %------------------------------------------------------% c - call second (t2) + call arscnd (t2) first = .TRUE. + if (itry .eq. 1) call scopy (n, workd(n + 1), 1, resid, 1) if (bmat .eq. 'G') then nbx = nbx + 1 - call scopy (n, workd(n+1), 1, resid, 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 @@ -286,14 +279,14 @@ subroutine sgetv0 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd, 1) end if -c +c 20 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c first = .FALSE. if (bmat .eq. 'G') then rnorm0 = sdot (n, resid, 1, workd, 1) @@ -308,7 +301,7 @@ subroutine sgetv0 c %---------------------------------------------% c if (j .eq. 1) go to 50 -c +c c %---------------------------------------------------------------- c | Otherwise need to B-orthogonalize the starting vector against | c | the current Arnoldi basis using Gram-Schmidt with iter. ref. | @@ -324,16 +317,16 @@ subroutine sgetv0 orth = .TRUE. 30 continue c - call sgemv ('T', n, j-1, one, v, ldv, workd, 1, + call sgemv ('T', n, j-1, one, v, ldv, workd, 1, & zero, workd(n+1), 1) - call sgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, + call sgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, & one, resid, 1) -c +c c %----------------------------------------------------------% c | Compute the B-norm of the orthogonalized starting vector | c %----------------------------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(n+1), 1) @@ -344,14 +337,14 @@ subroutine sgetv0 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd, 1) end if -c +c 40 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c if (bmat .eq. 'G') then rnorm = sdot (n, resid, 1, workd, 1) rnorm = sqrt(abs(rnorm)) @@ -364,14 +357,14 @@ subroutine sgetv0 c %--------------------------------------% c if (msglvl .gt. 2) then - call svout (logfil, 1, rnorm0, ndigit, + call svout (logfil, 1, [rnorm0], ndigit, & '_getv0: re-orthonalization ; rnorm0 is') - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_getv0: re-orthonalization ; rnorm is') end if c if (rnorm .gt. 0.717*rnorm0) go to 50 -c +c iter = iter + 1 if (iter .le. 5) then c @@ -393,11 +386,11 @@ subroutine sgetv0 rnorm = zero ierr = -1 end if -c +c 50 continue c if (msglvl .gt. 0) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_getv0: B-norm of initial / restarted starting vector') end if if (msglvl .gt. 3) then @@ -405,10 +398,10 @@ subroutine sgetv0 & '_getv0: initial / restarted starting vector') end if ido = 99 -c - call second (t1) +c + call arscnd (t1) tgetv0 = tgetv0 + (t1 - t0) -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/slaqrb.f b/Toolbox/arpack-src/slaqrb.f deleted file mode 100644 index e967b18e4..000000000 --- a/Toolbox/arpack-src/slaqrb.f +++ /dev/null @@ -1,521 +0,0 @@ -c----------------------------------------------------------------------- -c\BeginDoc -c -c\Name: slaqrb -c -c\Description: -c Compute the eigenvalues and the Schur decomposition of an upper -c Hessenberg submatrix in rows and columns ILO to IHI. Only the -c last component of the Schur vectors are computed. -c -c This is mostly a modification of the LAPACK routine slahqr. -c -c\Usage: -c call slaqrb -c ( WANTT, N, ILO, IHI, H, LDH, WR, WI, Z, INFO ) -c -c\Arguments -c WANTT Logical variable. (INPUT) -c = .TRUE. : the full Schur form T is required; -c = .FALSE.: only eigenvalues are required. -c -c N Integer. (INPUT) -c The order of the matrix H. N >= 0. -c -c ILO Integer. (INPUT) -c IHI Integer. (INPUT) -c It is assumed that H is already upper quasi-triangular in -c rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless -c ILO = 1). SLAQRB works primarily with the Hessenberg -c submatrix in rows and columns ILO to IHI, but applies -c transformations to all of H if WANTT is .TRUE.. -c 1 <= ILO <= max(1,IHI); IHI <= N. -c -c H Real array, dimension (LDH,N). (INPUT/OUTPUT) -c On entry, the upper Hessenberg matrix H. -c On exit, if WANTT is .TRUE., H is upper quasi-triangular in -c rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in -c standard form. If WANTT is .FALSE., the contents of H are -c unspecified on exit. -c -c LDH Integer. (INPUT) -c The leading dimension of the array H. LDH >= max(1,N). -c -c WR Real array, dimension (N). (OUTPUT) -c WI Real array, dimension (N). (OUTPUT) -c The real and imaginary parts, respectively, of the computed -c eigenvalues ILO to IHI are stored in the corresponding -c elements of WR and WI. If two eigenvalues are computed as a -c complex conjugate pair, they are stored in consecutive -c elements of WR and WI, say the i-th and (i+1)th, with -c WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the -c eigenvalues are stored in the same order as on the diagonal -c of the Schur form returned in H, with WR(i) = H(i,i), and, if -c H(i:i+1,i:i+1) is a 2-by-2 diagonal block, -c WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). -c -c Z Real array, dimension (N). (OUTPUT) -c On exit Z contains the last components of the Schur vectors. -c -c INFO Integer. (OUPUT) -c = 0: successful exit -c > 0: SLAQRB failed to compute all the eigenvalues ILO to IHI -c in a total of 30*(IHI-ILO+1) iterations; if INFO = i, -c elements i+1:ihi of WR and WI contain those eigenvalues -c which have been successfully computed. -c -c\Remarks -c 1. None. -c -c----------------------------------------------------------------------- -c -c\BeginLib -c -c\Local variables: -c xxxxxx real -c -c\Routines called: -c slabad LAPACK routine that computes machine constants. -c slamch LAPACK routine that determines machine constants. -c slanhs LAPACK routine that computes various norms of a matrix. -c slanv2 LAPACK routine that computes the Schur factorization of -c 2 by 2 nonsymmetric matrix in standard form. -c slarfg LAPACK Householder reflection construction routine. -c scopy Level 1 BLAS that copies one vector to another. -c srot Level 1 BLAS that applies a rotation to a 2 by 2 matrix. - -c -c\Author -c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas -c -c\Revision history: -c xx/xx/92: Version ' 2.4' -c Modified from the LAPACK routine slahqr so that only the -c last component of the Schur vectors are computed. -c -c\SCCS Information: @(#) -c FILE: laqrb.F SID: 2.2 DATE OF SID: 8/27/96 RELEASE: 2 -c -c\Remarks -c 1. None -c -c\EndLib -c -c----------------------------------------------------------------------- -c - subroutine slaqrb ( wantt, n, ilo, ihi, h, ldh, wr, wi, - & z, info ) -c -c %------------------% -c | Scalar Arguments | -c %------------------% -c - logical wantt - integer ihi, ilo, info, ldh, n -c -c %-----------------% -c | Array Arguments | -c %-----------------% -c - Real - & h( ldh, * ), wi( * ), wr( * ), z( * ) -c -c %------------% -c | Parameters | -c %------------% -c - Real - & zero, one, dat1, dat2 - parameter (zero = 0.0E+0, one = 1.0E+0, dat1 = 7.5E-1, - & dat2 = -4.375E-1) -c -c %------------------------% -c | Local Scalars & Arrays | -c %------------------------% -c - integer i, i1, i2, itn, its, j, k, l, m, nh, nr - Real - & cs, h00, h10, h11, h12, h21, h22, h33, h33s, - & h43h34, h44, h44s, ovfl, s, smlnum, sn, sum, - & t1, t2, t3, tst1, ulp, unfl, v1, v2, v3 - Real - & v( 3 ), work( 1 ) -c -c %--------------------% -c | External Functions | -c %--------------------% -c - Real - & slamch, slanhs - external slamch, slanhs -c -c %----------------------% -c | External Subroutines | -c %----------------------% -c - external scopy, slabad, slanv2, slarfg, srot -c -c %-----------------------% -c | Executable Statements | -c %-----------------------% -c - info = 0 -c -c %--------------------------% -c | Quick return if possible | -c %--------------------------% -c - if( n.eq.0 ) - & return - if( ilo.eq.ihi ) then - wr( ilo ) = h( ilo, ilo ) - wi( ilo ) = zero - return - end if -c -c %---------------------------------------------% -c | Initialize the vector of last components of | -c | the Schur vectors for accumulation. | -c %---------------------------------------------% -c - do 5 j = 1, n-1 - z(j) = zero - 5 continue - z(n) = one -c - nh = ihi - ilo + 1 -c -c %-------------------------------------------------------------% -c | Set machine-dependent constants for the stopping criterion. | -c | If norm(H) <= sqrt(OVFL), overflow should not occur. | -c %-------------------------------------------------------------% -c - unfl = slamch( 'safe minimum' ) - ovfl = one / unfl - call slabad( unfl, ovfl ) - ulp = slamch( 'precision' ) - smlnum = unfl*( nh / ulp ) -c -c %---------------------------------------------------------------% -c | I1 and I2 are the indices of the first row and last column | -c | of H to which transformations must be applied. If eigenvalues | -c | only are computed, I1 and I2 are set inside the main loop. | -c | Zero out H(J+2,J) = ZERO for J=1:N if WANTT = .TRUE. | -c | else H(J+2,J) for J=ILO:IHI-ILO-1 if WANTT = .FALSE. | -c %---------------------------------------------------------------% -c - if( wantt ) then - i1 = 1 - i2 = n - do 8 i=1,i2-2 - h(i1+i+1,i) = zero - 8 continue - else - do 9 i=1, ihi-ilo-1 - h(ilo+i+1,ilo+i-1) = zero - 9 continue - end if -c -c %---------------------------------------------------% -c | ITN is the total number of QR iterations allowed. | -c %---------------------------------------------------% -c - itn = 30*nh -c -c ------------------------------------------------------------------ -c The main loop begins here. I is the loop index and decreases from -c IHI to ILO in steps of 1 or 2. Each iteration of the loop works -c with the active submatrix in rows and columns L to I. -c Eigenvalues I+1 to IHI have already converged. Either L = ILO or -c H(L,L-1) is negligible so that the matrix splits. -c ------------------------------------------------------------------ -c - i = ihi - 10 continue - l = ilo - if( i.lt.ilo ) - & go to 150 - -c %--------------------------------------------------------------% -c | Perform QR iterations on rows and columns ILO to I until a | -c | submatrix of order 1 or 2 splits off at the bottom because a | -c | subdiagonal element has become negligible. | -c %--------------------------------------------------------------% - - do 130 its = 0, itn -c -c %----------------------------------------------% -c | Look for a single small subdiagonal element. | -c %----------------------------------------------% -c - do 20 k = i, l + 1, -1 - tst1 = abs( h( k-1, k-1 ) ) + abs( h( k, k ) ) - if( tst1.eq.zero ) - & tst1 = slanhs( '1', i-l+1, h( l, l ), ldh, work ) - if( abs( h( k, k-1 ) ).le.max( ulp*tst1, smlnum ) ) - & go to 30 - 20 continue - 30 continue - l = k - if( l.gt.ilo ) then -c -c %------------------------% -c | H(L,L-1) is negligible | -c %------------------------% -c - h( l, l-1 ) = zero - end if -c -c %-------------------------------------------------------------% -c | Exit from loop if a submatrix of order 1 or 2 has split off | -c %-------------------------------------------------------------% -c - if( l.ge.i-1 ) - & go to 140 -c -c %---------------------------------------------------------% -c | Now the active submatrix is in rows and columns L to I. | -c | If eigenvalues only are being computed, only the active | -c | submatrix need be transformed. | -c %---------------------------------------------------------% -c - if( .not.wantt ) then - i1 = l - i2 = i - end if -c - if( its.eq.10 .or. its.eq.20 ) then -c -c %-------------------% -c | Exceptional shift | -c %-------------------% -c - s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) - h44 = dat1*s - h33 = h44 - h43h34 = dat2*s*s -c - else -c -c %-----------------------------------------% -c | Prepare to use Wilkinson's double shift | -c %-----------------------------------------% -c - h44 = h( i, i ) - h33 = h( i-1, i-1 ) - h43h34 = h( i, i-1 )*h( i-1, i ) - end if -c -c %-----------------------------------------------------% -c | Look for two consecutive small subdiagonal elements | -c %-----------------------------------------------------% -c - do 40 m = i - 2, l, -1 -c -c %---------------------------------------------------------% -c | Determine the effect of starting the double-shift QR | -c | iteration at row M, and see if this would make H(M,M-1) | -c | negligible. | -c %---------------------------------------------------------% -c - h11 = h( m, m ) - h22 = h( m+1, m+1 ) - h21 = h( m+1, m ) - h12 = h( m, m+1 ) - h44s = h44 - h11 - h33s = h33 - h11 - v1 = ( h33s*h44s-h43h34 ) / h21 + h12 - v2 = h22 - h11 - h33s - h44s - v3 = h( m+2, m+1 ) - s = abs( v1 ) + abs( v2 ) + abs( v3 ) - v1 = v1 / s - v2 = v2 / s - v3 = v3 / s - v( 1 ) = v1 - v( 2 ) = v2 - v( 3 ) = v3 - if( m.eq.l ) - & go to 50 - h00 = h( m-1, m-1 ) - h10 = h( m, m-1 ) - tst1 = abs( v1 )*( abs( h00 )+abs( h11 )+abs( h22 ) ) - if( abs( h10 )*( abs( v2 )+abs( v3 ) ).le.ulp*tst1 ) - & go to 50 - 40 continue - 50 continue -c -c %----------------------% -c | Double-shift QR step | -c %----------------------% -c - do 120 k = m, i - 1 -c -c ------------------------------------------------------------ -c The first iteration of this loop determines a reflection G -c from the vector V and applies it from left and right to H, -c thus creating a nonzero bulge below the subdiagonal. -c -c Each subsequent iteration determines a reflection G to -c restore the Hessenberg form in the (K-1)th column, and thus -c chases the bulge one step toward the bottom of the active -c submatrix. NR is the order of G. -c ------------------------------------------------------------ -c - nr = min( 3, i-k+1 ) - if( k.gt.m ) - & call scopy( nr, h( k, k-1 ), 1, v, 1 ) - call slarfg( nr, v( 1 ), v( 2 ), 1, t1 ) - if( k.gt.m ) then - h( k, k-1 ) = v( 1 ) - h( k+1, k-1 ) = zero - if( k.lt.i-1 ) - & h( k+2, k-1 ) = zero - else if( m.gt.l ) then - h( k, k-1 ) = -h( k, k-1 ) - end if - v2 = v( 2 ) - t2 = t1*v2 - if( nr.eq.3 ) then - v3 = v( 3 ) - t3 = t1*v3 -c -c %------------------------------------------------% -c | Apply G from the left to transform the rows of | -c | the matrix in columns K to I2. | -c %------------------------------------------------% -c - do 60 j = k, i2 - sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) - h( k, j ) = h( k, j ) - sum*t1 - h( k+1, j ) = h( k+1, j ) - sum*t2 - h( k+2, j ) = h( k+2, j ) - sum*t3 - 60 continue -c -c %----------------------------------------------------% -c | Apply G from the right to transform the columns of | -c | the matrix in rows I1 to min(K+3,I). | -c %----------------------------------------------------% -c - do 70 j = i1, min( k+3, i ) - sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) - h( j, k ) = h( j, k ) - sum*t1 - h( j, k+1 ) = h( j, k+1 ) - sum*t2 - h( j, k+2 ) = h( j, k+2 ) - sum*t3 - 70 continue -c -c %----------------------------------% -c | Accumulate transformations for Z | -c %----------------------------------% -c - sum = z( k ) + v2*z( k+1 ) + v3*z( k+2 ) - z( k ) = z( k ) - sum*t1 - z( k+1 ) = z( k+1 ) - sum*t2 - z( k+2 ) = z( k+2 ) - sum*t3 - - else if( nr.eq.2 ) then -c -c %------------------------------------------------% -c | Apply G from the left to transform the rows of | -c | the matrix in columns K to I2. | -c %------------------------------------------------% -c - do 90 j = k, i2 - sum = h( k, j ) + v2*h( k+1, j ) - h( k, j ) = h( k, j ) - sum*t1 - h( k+1, j ) = h( k+1, j ) - sum*t2 - 90 continue -c -c %----------------------------------------------------% -c | Apply G from the right to transform the columns of | -c | the matrix in rows I1 to min(K+3,I). | -c %----------------------------------------------------% -c - do 100 j = i1, i - sum = h( j, k ) + v2*h( j, k+1 ) - h( j, k ) = h( j, k ) - sum*t1 - h( j, k+1 ) = h( j, k+1 ) - sum*t2 - 100 continue -c -c %----------------------------------% -c | Accumulate transformations for Z | -c %----------------------------------% -c - sum = z( k ) + v2*z( k+1 ) - z( k ) = z( k ) - sum*t1 - z( k+1 ) = z( k+1 ) - sum*t2 - end if - 120 continue - - 130 continue -c -c %-------------------------------------------------------% -c | Failure to converge in remaining number of iterations | -c %-------------------------------------------------------% -c - info = i - return - - 140 continue - - if( l.eq.i ) then -c -c %------------------------------------------------------% -c | H(I,I-1) is negligible: one eigenvalue has converged | -c %------------------------------------------------------% -c - wr( i ) = h( i, i ) - wi( i ) = zero - - else if( l.eq.i-1 ) then -c -c %--------------------------------------------------------% -c | H(I-1,I-2) is negligible; | -c | a pair of eigenvalues have converged. | -c | | -c | Transform the 2-by-2 submatrix to standard Schur form, | -c | and compute and store the eigenvalues. | -c %--------------------------------------------------------% -c - call slanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ), - & h( i, i ), wr( i-1 ), wi( i-1 ), wr( i ), wi( i ), - & cs, sn ) - - if( wantt ) then -c -c %-----------------------------------------------------% -c | Apply the transformation to the rest of H and to Z, | -c | as required. | -c %-----------------------------------------------------% -c - if( i2.gt.i ) - & call srot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh, - & cs, sn ) - call srot( i-i1-1, h( i1, i-1 ), 1, h( i1, i ), 1, cs, sn ) - sum = cs*z( i-1 ) + sn*z( i ) - z( i ) = cs*z( i ) - sn*z( i-1 ) - z( i-1 ) = sum - end if - end if -c -c %---------------------------------------------------------% -c | Decrement number of remaining iterations, and return to | -c | start of the main loop with new value of I. | -c %---------------------------------------------------------% -c - itn = itn - its - i = l - 1 - go to 10 - - 150 continue - return -c -c %---------------% -c | End of slaqrb | -c %---------------% -c - end diff --git a/Toolbox/arpack-src/snaitr.f b/Toolbox/arpack-src/snaitr.f index 6a2be3a05..8a5d795be 100644 --- a/Toolbox/arpack-src/snaitr.f +++ b/Toolbox/arpack-src/snaitr.f @@ -3,8 +3,8 @@ c c\Name: snaitr c -c\Description: -c Reverse communication interface for applying NP additional steps to +c\Description: +c Reverse communication interface for applying NP additional steps to c a K step nonsymmetric Arnoldi factorization. c c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T @@ -20,7 +20,7 @@ c c\Usage: c call snaitr -c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, +c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, c IPNTR, WORKD, INFO ) c c\Arguments @@ -62,8 +62,8 @@ c Number of additional Arnoldi steps to take. c c NB Integer. (INPUT) -c Blocksize to be used in the recurrence. -c Only work for NB = 1 right now. The goal is to have a +c Blocksize to be used in the recurrence. +c Only work for NB = 1 right now. The goal is to have a c program that implement both the block and non-block method. c c RESID Real array of length N. (INPUT/OUTPUT) @@ -75,37 +75,37 @@ c B-norm of the updated residual r_{k+p} on output. c c V Real N by K+NP array. (INPUT/OUTPUT) -c On INPUT: V contains the Arnoldi vectors in the first K +c On INPUT: V contains the Arnoldi vectors in the first K c columns. c On OUTPUT: V contains the new NP Arnoldi vectors in the next c NP columns. The first K columns are unchanged. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Real (K+NP) by (K+NP) array. (INPUT/OUTPUT) c H is used to store the generated upper Hessenberg matrix. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORK for +c Pointer to mark the starting locations in the WORK for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Real work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The calling program should not +c for reverse communication. The calling program should not c use WORKD as temporary workspace during the iteration !!!!!! -c On input, WORKD(1:N) = B*RESID and is used to save some +c On input, WORKD(1:N) = B*RESID and is used to save some c computation at the first step. c c INFO Integer. (OUTPUT) @@ -125,14 +125,14 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: c sgetv0 ARPACK routine to generate the initial vector. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c smout ARPACK utility routine that prints matrices c svout ARPACK utility routine that prints vectors. c slabad LAPACK routine that computes machine constants. @@ -143,7 +143,7 @@ c saxpy Level 1 BLAS that computes a vector triad. c sscal Level 1 BLAS that scales a vector. c scopy Level 1 BLAS that copies one vector to another . -c sdot Level 1 BLAS that computes the scalar product of two vectors. +c sdot Level 1 BLAS that computes the scalar product of two vectors. c snrm2 Level 1 BLAS that computes the norm of a vector. c c\Author @@ -151,22 +151,22 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c xx/xx/92: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: naitr.F SID: 2.4 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks c The algorithm implemented is: -c +c c restart = .false. -c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; +c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; c r_{k} contains the initial residual vector even for k = 0; -c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already +c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already c computed by the calling program. c c betaj = rnorm ; p_{k+1} = B*r_{k} ; @@ -174,7 +174,7 @@ c 1) if ( betaj < tol ) stop or restart depending on j. c ( At present tol is zero ) c if ( restart ) generate a new starting vector. -c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; +c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; c p_{j} = p_{j}/betaj c 3) r_{j} = OP*v_{j} where OP is defined as in snaupd c For shift-invert mode p_{j} = B*v_{j} is already available. @@ -189,7 +189,7 @@ c 5) Re-orthogonalization step: c s = V_{j}'*B*r_{j} c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || -c alphaj = alphaj + s_{j}; +c alphaj = alphaj + s_{j}; c 6) Iterative refinement step: c If (rnorm1 > 0.717*rnorm) then c rnorm = rnorm1 @@ -199,7 +199,7 @@ c If this is the first time in step 6), go to 5) c Else r_{j} lies in the span of V_{j} numerically. c Set r_{j} = 0 and rnorm = 0; go to 1) -c EndIf +c EndIf c End Do c c\EndLib @@ -207,15 +207,15 @@ c----------------------------------------------------------------------- c subroutine snaitr - & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, + & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, & ipntr, workd, info) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -250,14 +250,14 @@ subroutine snaitr integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl, & jj Real - & betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl, + & betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl, & wnorm save first, orth1, orth2, rstart, step3, step4, & ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl, & betaj, rnorm1, smlnum, ulp, unfl, wnorm c c %-----------------------% -c | Local Array Arguments | +c | Local Array Arguments | c %-----------------------% c Real @@ -267,8 +267,8 @@ subroutine snaitr c | External Subroutines | c %----------------------% c - external saxpy, scopy, sscal, sgemv, sgetv0, slabad, - & svout, smout, ivout, second + external saxpy, scopy, sscal, sgemv, sgetv0, slabad, + & svout, smout, ivout, arscnd c c %--------------------% c | External Functions | @@ -313,15 +313,15 @@ subroutine snaitr end if c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mnaitr -c +c c %------------------------------% c | Initial call to this routine | c %------------------------------% @@ -337,7 +337,7 @@ subroutine snaitr irj = ipj + n ivj = irj + n end if -c +c c %-------------------------------------------------% c | When in reverse communication mode one of: | c | STEP3, STEP4, ORTH1, ORTH2, RSTART | @@ -367,19 +367,19 @@ subroutine snaitr c | | c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | c %--------------------------------------------------------------% - + 1000 continue c if (msglvl .gt. 1) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: generating Arnoldi vector number') - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_naitr: B-norm of the current residual is') end if -c +c c %---------------------------------------------------% c | STEP 1: Check if the B norm of j-th residual | -c | vector is zero. Equivalent to determing whether | +c | vector is zero. Equivalent to determining whether | c | an exact j-step Arnoldi factorization is present. | c %---------------------------------------------------% c @@ -393,16 +393,16 @@ subroutine snaitr c %---------------------------------------------------% c if (msglvl .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: ****** RESTART AT STEP ******') end if -c +c c %---------------------------------------------% c | ITRY is the loop variable that controls the | c | maximum amount of times that a restart is | c | attempted. NRSTRT is used by stat.h | c %---------------------------------------------% -c +c betaj = zero nrstrt = nrstrt + 1 itry = 1 @@ -416,7 +416,7 @@ subroutine snaitr c | RSTART = .true. flow returns here. | c %--------------------------------------% c - call sgetv0 (ido, bmat, itry, .false., n, j, v, ldv, + call sgetv0 (ido, bmat, itry, .false., n, j, v, ldv, & resid, rnorm, ipntr, workd, ierr) if (ido .ne. 99) go to 9000 if (ierr .lt. 0) then @@ -430,12 +430,12 @@ subroutine snaitr c %------------------------------------------------% c info = j - 1 - call second (t1) + call arscnd (t1) tnaitr = tnaitr + (t1 - t0) ido = 99 go to 9000 end if -c +c 40 continue c c %---------------------------------------------------------% @@ -457,9 +457,9 @@ subroutine snaitr c | use LAPACK routine SLASCL | c %-----------------------------------------% c - call slascl ('General', i, i, rnorm, one, n, 1, + call slascl ('General', i, i, rnorm, one, n, 1, & v(1,j), n, infol) - call slascl ('General', i, i, rnorm, one, n, 1, + call slascl ('General', i, i, rnorm, one, n, 1, & workd(ipj), n, infol) end if c @@ -470,29 +470,29 @@ subroutine snaitr c step3 = .true. nopx = nopx + 1 - call second (t2) + call arscnd (t2) call scopy (n, v(1,j), 1, workd(ivj), 1) ipntr(1) = ivj ipntr(2) = irj ipntr(3) = ipj ido = 1 -c +c c %-----------------------------------% c | Exit in order to compute OP*v_{j} | c %-----------------------------------% -c - go to 9000 +c + go to 9000 50 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | c | if step3 = .true. | c %----------------------------------% c - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) - + step3 = .false. c c %------------------------------------------% @@ -500,30 +500,30 @@ subroutine snaitr c %------------------------------------------% c call scopy (n, workd(irj), 1, resid, 1) -c +c c %---------------------------------------% c | STEP 4: Finish extending the Arnoldi | c | factorization to length j. | c %---------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 step4 = .true. ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-------------------------------------% c | Exit in order to compute B*OP*v_{j} | c %-------------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd(ipj), 1) end if 60 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | @@ -531,10 +531,10 @@ subroutine snaitr c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c step4 = .false. c c %-------------------------------------% @@ -542,7 +542,7 @@ subroutine snaitr c | Compute the B-norm of OP*v_{j}. | c %-------------------------------------% c - if (bmat .eq. 'G') then + if (bmat .eq. 'G') then wnorm = sdot (n, resid, 1, workd(ipj), 1) wnorm = sqrt(abs(wnorm)) else if (bmat .eq. 'I') then @@ -562,13 +562,13 @@ subroutine snaitr c | Compute the j Fourier coefficients w_{j} | c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | c %------------------------------------------% -c +c call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1, & zero, h(1,j), 1) c c %--------------------------------------% c | Orthogonalize r_{j} against V_{j}. | -c | RESID contains OP*v_{j}. See STEP 3. | +c | RESID contains OP*v_{j}. See STEP 3. | c %--------------------------------------% c call sgemv ('N', n, j, -one, v, ldv, h(1,j), 1, @@ -576,51 +576,51 @@ subroutine snaitr c if (j .gt. 1) h(j,j-1) = betaj c - call second (t4) -c + call arscnd (t4) +c orth1 = .true. c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*r_{j} | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd(ipj), 1) - end if + end if 70 continue -c +c c %---------------------------------------------------% c | Back from reverse communication if ORTH1 = .true. | c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c orth1 = .false. c c %------------------------------% c | Compute the B-norm of r_{j}. | c %------------------------------% c - if (bmat .eq. 'G') then + if (bmat .eq. 'G') then rnorm = sdot (n, resid, 1, workd(ipj), 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then rnorm = snrm2(n, resid, 1) end if -c +c c %-----------------------------------------------------------% c | STEP 5: Re-orthogonalization / Iterative refinement phase | c | Maximum NITER_ITREF tries. | @@ -642,20 +642,20 @@ subroutine snaitr if (rnorm .gt. 0.717*wnorm) go to 100 iter = 0 nrorth = nrorth + 1 -c +c c %---------------------------------------------------% c | Enter the Iterative refinement phase. If further | c | refinement is necessary, loop back here. The loop | c | variable is ITER. Perform a step of Classical | c | Gram-Schmidt using all the Arnoldi vectors V_{j} | c %---------------------------------------------------% -c +c 80 continue c if (msglvl .gt. 2) then xtemp(1) = wnorm xtemp(2) = rnorm - call svout (logfil, 2, xtemp, ndigit, + call svout (logfil, 2, xtemp, ndigit, & '_naitr: re-orthonalization; wnorm and rnorm are') call svout (logfil, j, h(1,j), ndigit, & '_naitr: j-th column of H') @@ -666,7 +666,7 @@ subroutine snaitr c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | c %----------------------------------------------------% c - call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1, + call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1, & zero, workd(irj), 1) c c %---------------------------------------------% @@ -676,28 +676,28 @@ subroutine snaitr c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | c %---------------------------------------------% c - call sgemv ('N', n, j, -one, v, ldv, workd(irj), 1, + call sgemv ('N', n, j, -one, v, ldv, workd(irj), 1, & one, resid, 1) call saxpy (j, one, workd(irj), 1, h(1,j), 1) -c +c orth2 = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-----------------------------------% c | Exit in order to compute B*r_{j}. | c | r_{j} is the corrected residual. | c %-----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd(ipj), 1) - end if + end if 90 continue c c %---------------------------------------------------% @@ -705,15 +705,15 @@ subroutine snaitr c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if c c %-----------------------------------------------------% c | Compute the B-norm of the corrected residual r_{j}. | c %-----------------------------------------------------% -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm1 = sdot (n, resid, 1, workd(ipj), 1) rnorm1 = sqrt(abs(rnorm1)) else if (bmat .eq. 'I') then @@ -721,7 +721,7 @@ subroutine snaitr end if c if (msglvl .gt. 0 .and. iter .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: Iterative refinement for Arnoldi residual') if (msglvl .gt. 2) then xtemp(1) = rnorm @@ -749,7 +749,7 @@ subroutine snaitr c %---------------------------------------% c rnorm = rnorm1 -c +c else c c %-------------------------------------------% @@ -771,50 +771,50 @@ subroutine snaitr 95 continue rnorm = zero end if -c +c c %----------------------------------------------% c | Branch here directly if iterative refinement | c | wasn't necessary or after at most NITER_REF | c | steps of iterative refinement. | c %----------------------------------------------% -c +c 100 continue -c +c rstart = .false. orth2 = .false. -c - call second (t5) +c + call arscnd (t5) titref = titref + (t5 - t4) -c +c c %------------------------------------% c | STEP 6: Update j = j+1; Continue | c %------------------------------------% c j = j + 1 if (j .gt. k+np) then - call second (t1) + call arscnd (t1) tnaitr = tnaitr + (t1 - t0) ido = 99 do 110 i = max(1,k), k+np-1 -c +c c %--------------------------------------------% c | Check for splitting and deflation. | c | Use a standard test as in the QR algorithm | c | REFERENCE: LAPACK subroutine slahqr | c %--------------------------------------------% -c +c tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) ) if( tst1.eq.zero ) & tst1 = slanhs( '1', k+np, h, ldh, workd(n+1) ) - if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) + if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 110 continue -c +c if (msglvl .gt. 2) then - call smout (logfil, k+np, k+np, h, ldh, ndigit, + call smout (logfil, k+np, k+np, h, ldh, ndigit, & '_naitr: Final upper Hessenberg matrix H of order K+NP') end if -c +c go to 9000 end if c @@ -823,7 +823,7 @@ subroutine snaitr c %--------------------------------------------------------% c go to 1000 -c +c c %---------------------------------------------------------------% c | | c | E N D O F M A I N I T E R A T I O N L O O P | diff --git a/Toolbox/arpack-src/snapps.f b/Toolbox/arpack-src/snapps.f index f7de4bda7..33b036108 100644 --- a/Toolbox/arpack-src/snapps.f +++ b/Toolbox/arpack-src/snapps.f @@ -13,14 +13,14 @@ c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q c c where Q is an orthogonal matrix which is the product of rotations -c and reflections resulting from the NP bulge chage sweeps. +c and reflections resulting from the NP bulge change sweeps. c The updated Arnoldi factorization becomes: c c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T. c c\Usage: c call snapps -c ( N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ, +c ( N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ, c WORKL, WORKD ) c c\Arguments @@ -29,8 +29,8 @@ c c KEV Integer. (INPUT/OUTPUT) c KEV+NP is the size of the input matrix H. -c KEV is the size of the updated matrix HNEW. KEV is only -c updated on ouput when fewer than NP shifts are applied in +c KEV is the size of the updated matrix HNEW. KEV is only +c updated on output when fewer than NP shifts are applied in c order to keep the conjugate pair together. c c NP Integer. (INPUT) @@ -38,7 +38,7 @@ c c SHIFTR, Real array of length NP. (INPUT) c SHIFTI Real and imaginary part of the shifts to be applied. -c Upon, entry to snapps, the shifts must be sorted so that the +c Upon, entry to snapps, the shifts must be sorted so that the c conjugate pairs are in consecutive locations. c c V Real N by (KEV+NP) array. (INPUT/OUTPUT) @@ -51,7 +51,7 @@ c program. c c H Real (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT) -c On INPUT, H contains the current KEV+NP by KEV+NP upper +c On INPUT, H contains the current KEV+NP by KEV+NP upper c Hessenber matrix of the Arnoldi factorization. c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg c matrix in the KEV leading submatrix. @@ -62,7 +62,7 @@ c c RESID Real array of length N. (INPUT/OUTPUT) c On INPUT, RESID contains the the residual vector r_{k+p}. -c On OUTPUT, RESID is the update residual vector rnew_{k} +c On OUTPUT, RESID is the update residual vector rnew_{k} c in the first KEV locations. c c Q Real KEV+NP by KEV+NP work array. (WORKSPACE) @@ -97,12 +97,12 @@ c c\Routines called: c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c smout ARPACK utility routine that prints matrices. c svout ARPACK utility routine that prints vectors. c slabad LAPACK routine that computes machine constants. c slacpy LAPACK matrix copy routine. -c slamch LAPACK routine that determines machine constants. +c slamch LAPACK routine that determines machine constants. c slanhs LAPACK routine that computes various norms of a matrix. c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c slarf LAPACK routine that applies Householder reflection to @@ -120,13 +120,13 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: napps.F SID: 2.4 DATE OF SID: 3/28/97 RELEASE: 2 c c\Remarks @@ -141,15 +141,15 @@ c----------------------------------------------------------------------- c subroutine snapps - & ( n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid, q, ldq, + & ( n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid, q, ldq, & workl, workd ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -162,7 +162,7 @@ subroutine snapps c %-----------------% c Real - & h(ldh,kev+np), resid(n), shifti(np), shiftr(np), + & h(ldh,kev+np), resid(n), shifti(np), shiftr(np), & v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np) c c %------------% @@ -180,16 +180,16 @@ subroutine snapps integer i, iend, ir, istart, j, jj, kplusp, msglvl, nr logical cconj, first Real - & c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai, + & c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai, & sigmar, smlnum, ulp, unfl, u(3), t, tau, tst1 - save first, ovfl, smlnum, ulp, unfl + save first, ovfl, smlnum, ulp, unfl c c %----------------------% c | External Subroutines | c %----------------------% c external saxpy, scopy, sscal, slacpy, slarfg, slarf, - & slaset, slabad, second, slartg + & slaset, slabad, arscnd, slartg c c %--------------------% c | External Functions | @@ -206,7 +206,7 @@ subroutine snapps intrinsic abs, max, min c c %----------------% -c | Data statments | +c | Data statements | c %----------------% c data first / .true. / @@ -237,10 +237,10 @@ subroutine snapps c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mnapps - kplusp = kev + np -c + kplusp = kev + np +c c %--------------------------------------------% c | Initialize Q to the identity to accumulate | c | the rotations and reflections | @@ -266,11 +266,11 @@ subroutine snapps sigmai = shifti(jj) c if (msglvl .gt. 2 ) then - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: shift number.') - call svout (logfil, 1, sigmar, ndigit, + call svout (logfil, 1, [sigmar], ndigit, & '_napps: The real part of the shift ') - call svout (logfil, 1, sigmai, ndigit, + call svout (logfil, 1, [sigmai], ndigit, & '_napps: The imaginary part of the shift ') end if c @@ -335,11 +335,11 @@ subroutine snapps & tst1 = slanhs( '1', kplusp-jj+1, h, ldh, workl ) if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_napps: matrix splitting at row/column no.') - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: matrix splitting with shift number.') - call svout (logfil, 1, h(i+1,i), ndigit, + call svout (logfil, 1, h(i+1,i), ndigit, & '_napps: off diagonal element.') end if iend = i @@ -351,9 +351,9 @@ subroutine snapps 40 continue c if (msglvl .gt. 2) then - call ivout (logfil, 1, istart, ndigit, + call ivout (logfil, 1, [istart], ndigit, & '_napps: Start of current block ') - call ivout (logfil, 1, iend, ndigit, + call ivout (logfil, 1, [iend], ndigit, & '_napps: End of current block ') end if c @@ -368,7 +368,7 @@ subroutine snapps c | complex conjugate pair of shifts on a 2 by 2 matrix. | c %------------------------------------------------------% c - if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero ) + if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero ) & go to 100 c h11 = h(istart,istart) @@ -381,11 +381,11 @@ subroutine snapps c f = h11 - sigmar g = h21 -c +c do 80 i = istart, iend-1 c c %-----------------------------------------------------% -c | Contruct the plane rotation G to zero out the bulge | +c | Construct the plane rotation G to zero out the bulge | c %-----------------------------------------------------% c call slartg (f, g, c, s, r) @@ -413,7 +413,7 @@ subroutine snapps do 50 j = i, kplusp t = c*h(i,j) + s*h(i+1,j) h(i+1,j) = -s*h(i,j) + c*h(i+1,j) - h(i,j) = t + h(i,j) = t 50 continue c c %---------------------------------------------% @@ -423,17 +423,17 @@ subroutine snapps do 60 j = 1, min(i+2,iend) t = c*h(j,i) + s*h(j,i+1) h(j,i+1) = -s*h(j,i) + c*h(j,i+1) - h(j,i) = t + h(j,i) = t 60 continue c c %----------------------------------------------------% c | Accumulate the rotation in the matrix Q; Q <- Q*G | c %----------------------------------------------------% c - do 70 j = 1, min( i+jj, kplusp ) + do 70 j = 1, min( i+jj, kplusp ) t = c*q(j,i) + s*q(j,i+1) q(j,i+1) = - s*q(j,i) + c*q(j,i+1) - q(j,i) = t + q(j,i) = t 70 continue c c %---------------------------% @@ -449,7 +449,7 @@ subroutine snapps c %-----------------------------------% c | Finished applying the real shift. | c %-----------------------------------% -c +c else c c %----------------------------------------------------% @@ -465,9 +465,9 @@ subroutine snapps c %---------------------------------------------------------% c s = 2.0*sigmar - t = slapy2 ( sigmar, sigmai ) + t = slapy2 ( sigmar, sigmai ) u(1) = ( h11 * (h11 - s) + t * t ) / h21 + h12 - u(2) = h11 + h22 - s + u(2) = h11 + h22 - s u(3) = h32 c do 90 i = istart, iend-1 @@ -507,7 +507,7 @@ subroutine snapps c | Accumulate the reflector in the matrix Q; Q <- Q*G | c %-----------------------------------------------------% c - call slarf ('Right', kplusp, nr, u, 1, tau, + call slarf ('Right', kplusp, nr, u, 1, tau, & q(1,i), ldq, workl) c c %----------------------------% @@ -526,7 +526,7 @@ subroutine snapps c | Finished applying a complex pair of shifts | c | to the current block | c %--------------------------------------------% -c +c end if c 100 continue @@ -568,7 +568,7 @@ subroutine snapps tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) ) if( tst1.eq.zero ) & tst1 = slanhs( '1', kev, h, ldh, workl ) - if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) ) + if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 130 continue c @@ -581,9 +581,9 @@ subroutine snapps c %-------------------------------------------------% c if (h(kev+1,kev) .gt. zero) - & call sgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, + & call sgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, & workd(n+1), 1) -c +c c %----------------------------------------------------------% c | Compute column 1 to kev of (V*Q) in backward order | c | taking advantage of the upper Hessenberg structure of Q. | @@ -600,14 +600,14 @@ subroutine snapps c %-------------------------------------------------% c call slacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv) -c +c c %--------------------------------------------------------------% c | Copy the (kev+1)-st column of (V*Q) in the appropriate place | c %--------------------------------------------------------------% c if (h(kev+1,kev) .gt. zero) & call scopy (n, workd(n+1), 1, v(1,kev+1), 1) -c +c c %-------------------------------------% c | Update the residual vector: | c | r <- sigmak*r + betak*v(:,kev+1) | @@ -625,7 +625,7 @@ subroutine snapps & '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}') call svout (logfil, 1, h(kev+1,kev), ndigit, & '_napps: betak = e_{kev+1}^T*H*e_{kev}') - call ivout (logfil, 1, kev, ndigit, + call ivout (logfil, 1, [kev], ndigit, & '_napps: Order of the final Hessenberg matrix ') if (msglvl .gt. 2) then call smout (logfil, kev, kev, h, ldh, ndigit, @@ -633,11 +633,11 @@ subroutine snapps end if c end if -c +c 9000 continue - call second (t1) + call arscnd (t1) tnapps = tnapps + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/snaup2.f b/Toolbox/arpack-src/snaup2.f index 048cd50df..e3be754ea 100644 --- a/Toolbox/arpack-src/snaup2.f +++ b/Toolbox/arpack-src/snaup2.f @@ -2,13 +2,13 @@ c c\Name: snaup2 c -c\Description: +c\Description: c Intermediate level interface called by snaupd. c c\Usage: c call snaup2 c ( IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD, -c ISHIFT, MXITER, V, LDV, H, LDH, RITZR, RITZI, BOUNDS, +c ISHIFT, MXITER, V, LDV, H, LDH, RITZR, RITZI, BOUNDS, c Q, LDQ, WORKL, IPNTR, WORKD, INFO ) c c\Arguments @@ -17,52 +17,52 @@ c MODE, ISHIFT, MXITER: see the definition of IPARAM in snaupd. c c NP Integer. (INPUT/OUTPUT) -c Contains the number of implicit shifts to apply during -c each Arnoldi iteration. -c If ISHIFT=1, NP is adjusted dynamically at each iteration +c Contains the number of implicit shifts to apply during +c each Arnoldi iteration. +c If ISHIFT=1, NP is adjusted dynamically at each iteration c to accelerate convergence and prevent stagnation. -c This is also roughly equal to the number of matrix-vector +c This is also roughly equal to the number of matrix-vector c products (involving the operator OP) per Arnoldi iteration. c The logic for adjusting is contained within the current c subroutine. c If ISHIFT=0, NP is the number of shifts the user needs -c to provide via reverse comunication. 0 < NP < NCV-NEV. +c to provide via reverse communication. 0 < NP < NCV-NEV. c NP may be less than NCV-NEV for two reasons. The first, is -c to keep complex conjugate pairs of "wanted" Ritz values +c to keep complex conjugate pairs of "wanted" Ritz values c together. The second, is that a leading block of the current c upper Hessenberg matrix has split off and contains "unwanted" c Ritz values. -c Upon termination of the IRA iteration, NP contains the number +c Upon termination of the IRA iteration, NP contains the number c of "converged" wanted Ritz values. c c IUPD Integer. (INPUT) c IUPD .EQ. 0: use explicit restart instead implicit update. c IUPD .NE. 0: use implicit update. c -c V Real N by (NEV+NP) array. (INPUT/OUTPUT) -c The Arnoldi basis vectors are returned in the first NEV +c V Real N by (NEV+NP) array. (INPUT/OUTPUT) +c The Arnoldi basis vectors are returned in the first NEV c columns of V. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c -c H Real (NEV+NP) by (NEV+NP) array. (OUTPUT) +c H Real (NEV+NP) by (NEV+NP) array. (OUTPUT) c H is used to store the generated upper Hessenberg matrix c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c -c RITZR, Real arrays of length NEV+NP. (OUTPUT) +c RITZR, Real arrays of length NEV+NP. (OUTPUT) c RITZI RITZR(1:NEV) (resp. RITZI(1:NEV)) contains the real (resp. c imaginary) part of the computed Ritz values of OP. c -c BOUNDS Real array of length NEV+NP. (OUTPUT) -c BOUNDS(1:NEV) contain the error bounds corresponding to +c BOUNDS Real array of length NEV+NP. (OUTPUT) +c BOUNDS(1:NEV) contain the error bounds corresponding to c the computed Ritz values. -c -c Q Real (NEV+NP) by (NEV+NP) array. (WORKSPACE) +c +c Q Real (NEV+NP) by (NEV+NP) array. (WORKSPACE) c Private (replicated) work array used to accumulate the c rotation in the shift application step. c @@ -70,7 +70,7 @@ c Leading dimension of Q exactly as declared in the calling c program. c -c WORKL Real work array of length at least +c WORKL Real work array of length at least c (NEV+NP)**2 + 3*(NEV+NP). (INPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. It is used in shifts calculation, shifts @@ -82,20 +82,20 @@ c listed in the same order as returned from sneigh. c c If ISHIFT .EQ. O and IDO .EQ. 3, the first 2*NP locations -c of WORKL are used in reverse communication to hold the user +c of WORKL are used in reverse communication to hold the user c supplied shifts. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORKD for +c Pointer to mark the starting locations in the WORKD for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c -c WORKD Real work array of length 3*N. (WORKSPACE) +c +c WORKD Real work array of length 3*N. (WORKSPACE) c Distributed array to be used in the basic Arnoldi iteration c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration !!!!!!!!!! @@ -108,7 +108,7 @@ c Error flag on output. c = 0: Normal return. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. +c All possible eigenvalues of OP has been found. c NP returns the number of converged Ritz values. c = 2: No shifts could be applied. c = -8: Error return from LAPACK eigenvalue calculation; @@ -130,12 +130,12 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c sgetv0 ARPACK initial vector generation routine. +c sgetv0 ARPACK initial vector generation routine. c snaitr ARPACK Arnoldi factorization routine. c snapps ARPACK application of implicit shifts routine. c snconv ARPACK convergence of Ritz values routine. @@ -143,25 +143,25 @@ c sngets ARPACK reorder Ritz values and error bounds routine. c ssortc ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c smout ARPACK utility routine that prints matrices c svout ARPACK utility routine that prints vectors. c slamch LAPACK routine that determines machine constants. c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c scopy Level 1 BLAS that copies one vector to another . -c sdot Level 1 BLAS that computes the scalar product of two vectors. +c sdot Level 1 BLAS that computes the scalar product of two vectors. c snrm2 Level 1 BLAS that computes the norm of a vector. c sswap Level 1 BLAS that swaps two vectors. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c -c\SCCS Information: @(#) +c Rice University +c Houston, Texas +c +c\SCCS Information: @(#) c FILE: naup2.F SID: 2.8 DATE OF SID: 10/17/00 RELEASE: 2 c c\Remarks @@ -172,16 +172,16 @@ c----------------------------------------------------------------------- c subroutine snaup2 - & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, h, ldh, ritzr, ritzi, bounds, + & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, + & ishift, mxiter, v, ldv, h, ldh, ritzr, ritzi, bounds, & q, ldq, workl, ipntr, workd, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -200,7 +200,7 @@ subroutine snaup2 integer ipntr(13) Real & bounds(nev+np), h(ldh,nev+np), q(ldq,nev+np), resid(n), - & ritzi(nev+np), ritzr(nev+np), v(ldv,nev+np), + & ritzi(nev+np), ritzr(nev+np), v(ldv,nev+np), & workd(3*n), workl( (nev+np)*(nev+np+3) ) c c %------------% @@ -209,7 +209,7 @@ subroutine snaup2 c Real & one, zero - parameter (one = 1.0E+0, zero = 0.0E+0) + parameter (one = 1.0E+0 , zero = 0.0E+0 ) c c %---------------% c | Local Scalars | @@ -217,12 +217,12 @@ subroutine snaup2 c character wprime*2 logical cnorm , getv0, initv, update, ushift - integer ierr , iter , j , kplusp, msglvl, nconv, + integer ierr , iter , j , kplusp, msglvl, nconv, & nevbef, nev0 , np0 , nptemp, numcnv Real & rnorm , temp , eps23 save cnorm , getv0, initv, update, ushift, - & rnorm , iter , eps23, kplusp, msglvl, nconv , + & rnorm , iter , eps23, kplusp, msglvl, nconv , & nevbef, nev0 , np0 , numcnv c c %-----------------------% @@ -235,8 +235,8 @@ subroutine snaup2 c | External Subroutines | c %----------------------% c - external scopy , sgetv0, snaitr, snconv, sneigh, - & sngets, snapps, svout , ivout , second + external scopy , sgetv0, snaitr, snconv, sneigh, + & sngets, snapps, svout , ivout , arscnd c c %--------------------% c | External Functions | @@ -257,17 +257,17 @@ subroutine snaup2 c %-----------------------% c if (ido .eq. 0) then -c - call second (t0) -c +c + call arscnd (t0) +c msglvl = mnaup2 -c +c c %-------------------------------------% c | Get the machine dependent constant. | c %-------------------------------------% c eps23 = slamch('Epsilon-Machine') - eps23 = eps23**(2.0E+0 / 3.0E+0) + eps23 = eps23**(2.0E+0 / 3.0E+0 ) c nev0 = nev np0 = np @@ -284,7 +284,7 @@ subroutine snaup2 kplusp = nev + np nconv = 0 iter = 0 -c +c c %---------------------------------------% c | Set flags for computing the first NEV | c | steps of the Arnoldi factorization. | @@ -307,7 +307,7 @@ subroutine snaup2 initv = .false. end if end if -c +c c %---------------------------------------------% c | Get a possibly random starting vector and | c | force it into the range of the operator OP. | @@ -324,7 +324,7 @@ subroutine snaup2 if (rnorm .eq. zero) then c c %-----------------------------------------% -c | The initial vector is zero. Error exit. | +c | The initial vector is zero. Error exit. | c %-----------------------------------------% c info = -9 @@ -333,7 +333,7 @@ subroutine snaup2 getv0 = .false. ido = 0 end if -c +c c %-----------------------------------% c | Back from reverse communication : | c | continue with update step | @@ -353,14 +353,14 @@ subroutine snaup2 c %-------------------------------------% c if (cnorm) go to 100 -c +c c %----------------------------------------------------------% c | Compute the first NEV steps of the Arnoldi factorization | c %----------------------------------------------------------% c - call snaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, + call snaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, & h, ldh, ipntr, workd, info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -374,7 +374,7 @@ subroutine snaup2 info = -9999 go to 1200 end if -c +c c %--------------------------------------------------------------% c | | c | M A I N ARNOLDI I T E R A T I O N L O O P | @@ -382,16 +382,16 @@ subroutine snaup2 c | factorization in place. | c | | c %--------------------------------------------------------------% -c +c 1000 continue c iter = iter + 1 c if (msglvl .gt. 0) then - call ivout (logfil, 1, iter, ndigit, + call ivout (logfil, 1, [iter], ndigit, & '_naup2: **** Start of major iteration number ****') end if -c +c c %-----------------------------------------------------------% c | Compute NP additional steps of the Arnoldi factorization. | c | Adjust NP since NEV might have been updated by last call | @@ -401,9 +401,9 @@ subroutine snaup2 np = kplusp - nev c if (msglvl .gt. 1) then - call ivout (logfil, 1, nev, ndigit, + call ivout (logfil, 1, [nev], ndigit, & '_naup2: The length of the current Arnoldi factorization') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naup2: Extend the Arnoldi factorization by') end if c @@ -415,10 +415,10 @@ subroutine snaup2 20 continue update = .true. c - call snaitr (ido , bmat, n , nev, np , mode , resid, + call snaitr (ido , bmat, n , nev, np , mode , resid, & rnorm, v , ldv, h , ldh, ipntr, workd, & info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -435,10 +435,10 @@ subroutine snaup2 update = .false. c if (msglvl .gt. 1) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_naup2: Corresponding B-norm of the residual') end if -c +c c %--------------------------------------------------------% c | Compute the eigenvalues and corresponding error bounds | c | of the current upper Hessenberg matrix. | @@ -477,30 +477,30 @@ subroutine snaup2 nev = nev0 np = np0 numcnv = nev - call sngets (ishift, which, nev, np, ritzr, ritzi, + call sngets (ishift, which, nev, np, ritzr, ritzi, & bounds, workl, workl(np+1)) if (nev .eq. nev0+1) numcnv = nev0+1 -c +c c %-------------------% -c | Convergence test. | +c | Convergence test. | c %-------------------% c call scopy (nev, bounds(np+1), 1, workl(2*np+1), 1) - call snconv (nev, ritzr(np+1), ritzi(np+1), workl(2*np+1), + call snconv (nev, ritzr(np+1), ritzi(np+1), workl(2*np+1), & tol, nconv) -c +c if (msglvl .gt. 2) then kp(1) = nev kp(2) = np kp(3) = numcnv kp(4) = nconv - call ivout (logfil, 4, kp, ndigit, + call ivout (logfil, 4, kp, ndigit, & '_naup2: NEV, NP, NUMCNV, NCONV are') call svout (logfil, kplusp, ritzr, ndigit, & '_naup2: Real part of the eigenvalues of H') call svout (logfil, kplusp, ritzi, ndigit, & '_naup2: Imaginary part of the eigenvalues of H') - call svout (logfil, kplusp, bounds, ndigit, + call svout (logfil, kplusp, bounds, ndigit, & '_naup2: Ritz estimates of the current NCV Ritz values') end if c @@ -521,8 +521,8 @@ subroutine snaup2 nev = nev + 1 end if 30 continue -c - if ( (nconv .ge. numcnv) .or. +c + if ( (nconv .ge. numcnv) .or. & (iter .gt. mxiter) .or. & (np .eq. 0) ) then c @@ -536,7 +536,7 @@ subroutine snaup2 & ndigit, & '_naup2: Ritz eistmates computed by _neigh:') end if -c +c c %------------------------------------------------% c | Prepare to exit. Put the converged Ritz values | c | and corresponding bounds in RITZ(1:NCONV) and | @@ -598,7 +598,7 @@ subroutine snaup2 c c %----------------------------------------------------% c | Sort the Ritz values according to the scaled Ritz | -c | esitmates. This will push all the converged ones | +c | estimates. This will push all the converged ones | c | towards the front of ritzr, ritzi, bounds | c | (in the case when NCONV < NEV.) | c %----------------------------------------------------% @@ -635,13 +635,13 @@ subroutine snaup2 end if c c %------------------------------------% -c | Max iterations have been exceeded. | +c | Max iterations have been exceeded. | c %------------------------------------% c if (iter .gt. mxiter .and. nconv .lt. numcnv) info = 1 c c %---------------------% -c | No shifts to apply. | +c | No shifts to apply. | c %---------------------% c if (np .eq. 0 .and. nconv .lt. numcnv) info = 2 @@ -650,7 +650,7 @@ subroutine snaup2 go to 1100 c else if ( (nconv .lt. numcnv) .and. (ishift .eq. 1) ) then -c +c c %-------------------------------------------------% c | Do not have all the requested eigenvalues yet. | c | To prevent possible stagnation, adjust the size | @@ -664,26 +664,38 @@ subroutine snaup2 else if (nev .eq. 1 .and. kplusp .gt. 3) then nev = 2 end if +c %---- Scipy fix ------------------------------------------------ +c | We must keep nev below this value, as otherwise we can get +c | np == 0 (note that sngets below can bump nev by 1). If np == 0, +c | the next call to `snaitr` will write out-of-bounds. +c | + if (nev .gt. kplusp - 2) then + nev = kplusp - 2 + end if +c | +c %---- Scipy fix end -------------------------------------------- + +c np = kplusp - nev -c +c c %---------------------------------------% c | If the size of NEV was just increased | c | resort the eigenvalues. | c %---------------------------------------% -c - if (nevbef .lt. nev) - & call sngets (ishift, which, nev, np, ritzr, ritzi, +c + if (nevbef .lt. nev) + & call sngets (ishift, which, nev, np, ritzr, ritzi, & bounds, workl, workl(np+1)) c - end if -c + end if +c if (msglvl .gt. 0) then - call ivout (logfil, 1, nconv, ndigit, + call ivout (logfil, 1, [nconv], ndigit, & '_naup2: no. of "converged" Ritz values at this iter.') if (msglvl .gt. 1) then kp(1) = nev kp(2) = np - call ivout (logfil, 2, kp, ndigit, + call ivout (logfil, 2, kp, ndigit, & '_naup2: NEV and NP are') call svout (logfil, nev, ritzr(np+1), ndigit, & '_naup2: "wanted" Ritz values -- real part') @@ -697,7 +709,7 @@ subroutine snaup2 if (ishift .eq. 0) then c c %-------------------------------------------------------% -c | User specified shifts: reverse comminucation to | +c | User specified shifts: reverse communication to | c | compute the shifts. They are returned in the first | c | 2*NP locations of WORKL. | c %-------------------------------------------------------% @@ -706,7 +718,7 @@ subroutine snaup2 ido = 3 go to 9000 end if -c +c 50 continue c c %------------------------------------% @@ -718,7 +730,7 @@ subroutine snaup2 ushift = .false. c if ( ishift .eq. 0 ) then -c +c c %----------------------------------% c | Move the NP shifts from WORKL to | c | RITZR, RITZI to free up WORKL | @@ -729,14 +741,14 @@ subroutine snaup2 call scopy (np, workl(np+1), 1, ritzi, 1) end if c - if (msglvl .gt. 2) then - call ivout (logfil, 1, np, ndigit, + if (msglvl .gt. 2) then + call ivout (logfil, 1, [np], ndigit, & '_naup2: The number of shifts to apply ') call svout (logfil, np, ritzr, ndigit, & '_naup2: Real part of the shifts') call svout (logfil, np, ritzi, ndigit, & '_naup2: Imaginary part of the shifts') - if ( ishift .eq. 1 ) + if ( ishift .eq. 1 ) & call svout (logfil, np, bounds, ndigit, & '_naup2: Ritz estimates of the shifts') end if @@ -748,7 +760,7 @@ subroutine snaup2 c | The first 2*N locations of WORKD are used as workspace. | c %---------------------------------------------------------% c - call snapps (n, nev, np, ritzr, ritzi, v, ldv, + call snapps (n, nev, np, ritzr, ritzi, v, ldv, & h, ldh, resid, q, ldq, workl, workd) c c %---------------------------------------------% @@ -758,36 +770,36 @@ subroutine snaup2 c %---------------------------------------------% c cnorm = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(n+1), 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*RESID | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd, 1) end if -c +c 100 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(1:N) := B*RESID | c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm = sdot (n, resid, 1, workd, 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then @@ -796,12 +808,12 @@ subroutine snaup2 cnorm = .false. c if (msglvl .gt. 2) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_naup2: B-norm of residual for compressed factorization') call smout (logfil, nev, nev, h, ldh, ndigit, & '_naup2: Compressed upper Hessenberg matrix H') end if -c +c go to 1000 c c %---------------------------------------------------------------% @@ -814,7 +826,7 @@ subroutine snaup2 c mxiter = iter nev = numcnv -c +c 1200 continue ido = 99 c @@ -822,9 +834,9 @@ subroutine snaup2 c | Error Exit | c %------------% c - call second (t1) + call arscnd (t1) tnaup2 = t1 - t0 -c +c 9000 continue c c %---------------% diff --git a/Toolbox/arpack-src/snaupd.f b/Toolbox/arpack-src/snaupd.f index 76592ad2c..d6fad3386 100644 --- a/Toolbox/arpack-src/snaupd.f +++ b/Toolbox/arpack-src/snaupd.f @@ -2,19 +2,19 @@ c c\Name: snaupd c -c\Description: +c\Description: c Reverse communication interface for the Implicitly Restarted Arnoldi -c iteration. This subroutine computes approximations to a few eigenpairs -c of a linear operator "OP" with respect to a semi-inner product defined by -c a symmetric positive semi-definite real matrix B. B may be the identity -c matrix. NOTE: If the linear operator "OP" is real and symmetric -c with respect to the real positive semi-definite symmetric matrix B, +c iteration. This subroutine computes approximations to a few eigenpairs +c of a linear operator "OP" with respect to a semi-inner product defined by +c a symmetric positive semi-definite real matrix B. B may be the identity +c matrix. NOTE: If the linear operator "OP" is real and symmetric +c with respect to the real positive semi-definite symmetric matrix B, c i.e. B*OP = (OP`)*B, then subroutine ssaupd should be used instead. c c The computed approximate eigenvalues are called Ritz values and c the corresponding approximate eigenvectors are called Ritz vectors. c -c snaupd is usually called iteratively to solve one of the +c snaupd is usually called iteratively to solve one of the c following problems: c c Mode 1: A*x = lambda*x. @@ -25,18 +25,18 @@ c ===> (If M can be factored see remark 3 below) c c Mode 3: A*x = lambda*M*x, M symmetric semi-definite -c ===> OP = Real_Part{ inv[A - sigma*M]*M } and B = M. +c ===> OP = Real_Part{ inv[A - sigma*M]*M } and B = M. c ===> shift-and-invert mode (in real arithmetic) -c If OP*x = amu*x, then +c If OP*x = amu*x, then c amu = 1/2 * [ 1/(lambda-sigma) + 1/(lambda-conjg(sigma)) ]. c Note: If sigma is real, i.e. imaginary part of sigma is zero; -c Real_Part{ inv[A - sigma*M]*M } == inv[A - sigma*M]*M -c amu == 1/(lambda-sigma). -c +c Real_Part{ inv[A - sigma*M]*M } == inv[A - sigma*M]*M +c amu == 1/(lambda-sigma). +c c Mode 4: A*x = lambda*M*x, M symmetric semi-definite -c ===> OP = Imaginary_Part{ inv[A - sigma*M]*M } and B = M. +c ===> OP = Imaginary_Part{ inv[A - sigma*M]*M } and B = M. c ===> shift-and-invert mode (in real arithmetic) -c If OP*x = amu*x, then +c If OP*x = amu*x, then c amu = 1/2i * [ 1/(lambda-sigma) - 1/(lambda-conjg(sigma)) ]. c c Both mode 3 and 4 give the same enhancement to eigenvalues close to @@ -63,7 +63,7 @@ c c\Arguments c IDO Integer. (INPUT/OUTPUT) -c Reverse communication flag. IDO must be zero on the first +c Reverse communication flag. IDO must be zero on the first c call to snaupd. IDO will be set internally to c indicate the type of operation to be performed. Control is c then given back to the calling routine which has the @@ -86,13 +86,13 @@ c IDO = 2: compute Y = B * X where c IPNTR(1) is the pointer into WORKD for X, c IPNTR(2) is the pointer into WORKD for Y. -c IDO = 3: compute the IPARAM(8) real and imaginary parts +c IDO = 3: compute the IPARAM(8) real and imaginary parts c of the shifts where INPTR(14) is the pointer c into WORKL for placing the shifts. See Remark c 5 below. c IDO = 99: done c ------------------------------------------------------------- -c +c c BMAT Character*1. (INPUT) c BMAT specifies the type of the matrix B that defines the c semi-inner product for the operator OP. @@ -110,18 +110,18 @@ c 'LI' -> want the NEV eigenvalues of largest imaginary part. c 'SI' -> want the NEV eigenvalues of smallest imaginary part. c -c NEV Integer. (INPUT/OUTPUT) +c NEV Integer. (INPUT) c Number of eigenvalues of OP to be computed. 0 < NEV < N-1. c -c TOL Real scalar. (INPUT) -c Stopping criterion: the relative accuracy of the Ritz value +c TOL Real scalar. (INPUT) +c Stopping criterion: the relative accuracy of the Ritz value c is considered acceptable if BOUNDS(I) .LE. TOL*ABS(RITZ(I)) c where ABS(RITZ(I)) is the magnitude when RITZ(I) is complex. c DEFAULT = SLAMCH('EPS') (machine precision as computed c by the LAPACK auxiliary subroutine SLAMCH). c -c RESID Real array of length N. (INPUT/OUTPUT) -c On INPUT: +c RESID Real array of length N. (INPUT/OUTPUT) +c On INPUT: c If INFO .EQ. 0, a random initial residual vector is used. c If INFO .NE. 0, RESID contains the initial residual vector, c possibly from a previous run. @@ -131,17 +131,17 @@ c NCV Integer. (INPUT) c Number of columns of the matrix V. NCV must satisfy the two c inequalities 2 <= NCV-NEV and NCV <= N. -c This will indicate how many Arnoldi vectors are generated -c at each iteration. After the startup phase in which NEV -c Arnoldi vectors are generated, the algorithm generates -c approximately NCV-NEV Arnoldi vectors at each subsequent update -c iteration. Most of the cost in generating each Arnoldi vector is -c in the matrix-vector operation OP*x. -c NOTE: 2 <= NCV-NEV in order that complex conjugate pairs of Ritz +c This will indicate how many Arnoldi vectors are generated +c at each iteration. After the startup phase in which NEV +c Arnoldi vectors are generated, the algorithm generates +c approximately NCV-NEV Arnoldi vectors at each subsequent update +c iteration. Most of the cost in generating each Arnoldi vector is +c in the matrix-vector operation OP*x. +c NOTE: 2 <= NCV-NEV in order that complex conjugate pairs of Ritz c values are kept together. (See remark 4 below) c -c V Real array N by NCV. (OUTPUT) -c Contains the final set of Arnoldi basis vectors. +c V Real array N by NCV. (OUTPUT) +c Contains the final set of Arnoldi basis vectors. c c LDV Integer. (INPUT) c Leading dimension of V exactly as declared in the calling program. @@ -154,11 +154,11 @@ c ISHIFT = 0: the shifts are provided by the user via c reverse communication. The real and imaginary c parts of the NCV eigenvalues of the Hessenberg -c matrix H are returned in the part of the WORKL -c array corresponding to RITZR and RITZI. See remark +c matrix H are returned in the part of the WORKL +c array corresponding to RITZR and RITZI. See remark c 5 below. c ISHIFT = 1: exact shifts with respect to the current -c Hessenberg matrix H. This is equivalent to +c Hessenberg matrix H. This is equivalent to c restarting the iteration with a starting vector c that is a linear combination of approximate Schur c vectors associated with the "wanted" Ritz values. @@ -167,8 +167,8 @@ c IPARAM(2) = No longer referenced. c c IPARAM(3) = MXITER -c On INPUT: maximum number of Arnoldi update iterations allowed. -c On OUTPUT: actual number of Arnoldi update iterations taken. +c On INPUT: maximum number of Arnoldi update iterations allowed. +c On OUTPUT: actual number of Arnoldi update iterations taken. c c IPARAM(4) = NB: blocksize to be used in the recurrence. c The code currently works only for NB = 1. @@ -178,11 +178,11 @@ c the convergence criterion. c c IPARAM(6) = IUPD -c No longer referenced. Implicit restarting is ALWAYS used. +c No longer referenced. Implicit restarting is ALWAYS used. c c IPARAM(7) = MODE c On INPUT determines what type of eigenproblem is being solved. -c Must be 1,2,3,4; See under \Description of snaupd for the +c Must be 1,2,3,4; See under \Description of snaupd for the c four modes available. c c IPARAM(8) = NP @@ -194,7 +194,7 @@ c IPARAM(9) = NUMOP, IPARAM(10) = NUMOPB, IPARAM(11) = NUMREO, c OUTPUT: NUMOP = total number of OP*x operations, c NUMOPB = total number of B*x operations if BMAT='G', -c NUMREO = total number of steps of re-orthogonalization. +c NUMREO = total number of steps of re-orthogonalization. c c IPNTR Integer array of length 14. (OUTPUT) c Pointer to mark the starting locations in the WORKD and WORKL @@ -202,13 +202,13 @@ c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X in WORKD. c IPNTR(2): pointer to the current result vector Y in WORKD. -c IPNTR(3): pointer to the vector B * X in WORKD when used in +c IPNTR(3): pointer to the vector B * X in WORKD when used in c the shift-and-invert mode. c IPNTR(4): pointer to the next available location in WORKL c that is untouched by the program. c IPNTR(5): pointer to the NCV by NCV upper Hessenberg matrix c H in WORKL. -c IPNTR(6): pointer to the real part of the ritz value array +c IPNTR(6): pointer to the real part of the ritz value array c RITZR in WORKL. c IPNTR(7): pointer to the imaginary part of the ritz value array c RITZI in WORKL. @@ -219,9 +219,9 @@ c c Note: IPNTR(9:13) is only referenced by sneupd. See Remark 2 below. c -c IPNTR(9): pointer to the real part of the NCV RITZ values of the +c IPNTR(9): pointer to the real part of the NCV RITZ values of the c original system. -c IPNTR(10): pointer to the imaginary part of the NCV RITZ values of +c IPNTR(10): pointer to the imaginary part of the NCV RITZ values of c the original system. c IPNTR(11): pointer to the NCV corresponding error bounds. c IPNTR(12): pointer to the NCV by NCV upper quasi-triangular @@ -230,17 +230,17 @@ c of the upper Hessenberg matrix H. Only referenced by c sneupd if RVEC = .TRUE. See Remark 2 below. c ------------------------------------------------------------- -c -c WORKD Real work array of length 3*N. (REVERSE COMMUNICATION) +c +c WORKD Real work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The user should not use WORKD +c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration. Upon termination c WORKD(1:N) contains B*RESID(1:N). If an invariant subspace c associated with the converged Ritz values is desired, see remark c 2 below, subroutine sneupd uses this output. -c See Data Distribution Note below. +c See Data Distribution Note below. c -c WORKL Real work array of length LWORKL. (OUTPUT/WORKSPACE) +c WORKL Real work array of length LWORKL. (OUTPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. See Data Distribution Note below. c @@ -254,18 +254,18 @@ c Error flag on output. c = 0: Normal exit. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. IPARAM(5) +c All possible eigenvalues of OP has been found. IPARAM(5) c returns the number of wanted converged Ritz values. c = 2: No longer an informational error. Deprecated starting c with release 2 of ARPACK. -c = 3: No shifts could be applied during a cycle of the -c Implicitly restarted Arnoldi iteration. One possibility -c is to increase the size of NCV relative to NEV. +c = 3: No shifts could be applied during a cycle of the +c Implicitly restarted Arnoldi iteration. One possibility +c is to increase the size of NCV relative to NEV. c See remark 4 below. c = -1: N must be positive. c = -2: NEV must be positive. c = -3: NCV-NEV >= 2 and less than or equal to N. -c = -4: The maximum number of Arnoldi update iteration +c = -4: The maximum number of Arnoldi update iteration c must be greater than zero. c = -5: WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI' c = -6: BMAT must be one of 'I' or 'G'. @@ -273,7 +273,7 @@ c = -8: Error return from LAPACK eigenvalue calculation; c = -9: Starting vector is zero. c = -10: IPARAM(7) must be 1,2,3,4. -c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatable. +c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatible. c = -12: IPARAM(1) must be equal to 0 or 1. c = -9999: Could not build an Arnoldi factorization. c IPARAM(5) returns the size of the current Arnoldi @@ -285,31 +285,31 @@ c Mode = 3 and 4. After convergence, approximate eigenvalues of the c original problem may be obtained with the ARPACK subroutine sneupd. c -c 2. If a basis for the invariant subspace corresponding to the converged Ritz -c values is needed, the user must call sneupd immediately following +c 2. If a basis for the invariant subspace corresponding to the converged Ritz +c values is needed, the user must call sneupd immediately following c completion of snaupd. This is new starting with release 2 of ARPACK. c c 3. If M can be factored into a Cholesky factorization M = LL` c then Mode = 2 should not be selected. Instead one should use -c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular +c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular c linear systems should be solved with L and L` rather c than computing inverses. After convergence, an approximate c eigenvector z of the original problem is recovered by solving c L`z = x where x is a Ritz vector of OP. c c 4. At present there is no a-priori analysis to guide the selection -c of NCV relative to NEV. The only formal requrement is that NCV > NEV + 2. +c of NCV relative to NEV. The only formal requirement is that NCV > NEV + 2. c However, it is recommended that NCV .ge. 2*NEV+1. If many problems of c the same type are to be solved, one should experiment with increasing -c NCV while keeping NEV fixed for a given test problem. This will +c NCV while keeping NEV fixed for a given test problem. This will c usually decrease the required number of OP*x operations but it c also increases the work and storage required to maintain the orthogonal c basis vectors. The optimal "cross-over" with respect to CPU time -c is problem dependent and must be determined empirically. +c is problem dependent and must be determined empirically. c See Chapter 8 of Reference 2 for further information. c -c 5. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the -c NP = IPARAM(8) real and imaginary parts of the shifts in locations +c 5. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the +c NP = IPARAM(8) real and imaginary parts of the shifts in locations c real part imaginary part c ----------------------- -------------- c 1 WORKL(IPNTR(14)) WORKL(IPNTR(14)+NP) @@ -319,10 +319,10 @@ c . . c NP WORKL(IPNTR(14)+NP-1) WORKL(IPNTR(14)+2*NP-1). c -c Only complex conjugate pairs of shifts may be applied and the pairs -c must be placed in consecutive locations. The real part of the -c eigenvalues of the current upper Hessenberg matrix are located in -c WORKL(IPNTR(6)) through WORKL(IPNTR(6)+NCV-1) and the imaginary part +c Only complex conjugate pairs of shifts may be applied and the pairs +c must be placed in consecutive locations. The real part of the +c eigenvalues of the current upper Hessenberg matrix are located in +c WORKL(IPNTR(6)) through WORKL(IPNTR(6)+NCV-1) and the imaginary part c in WORKL(IPNTR(7)) through WORKL(IPNTR(7)+NCV-1). They are ordered c according to the order defined by WHICH. The complex conjugate c pairs are kept together and the associated Ritz estimates are located in @@ -330,11 +330,11 @@ c c----------------------------------------------------------------------- c -c\Data Distribution Note: +c\Data Distribution Note: c c Fortran-D syntax: c ================ -c Real resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) +c Real resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) c decompose d1(n), d2(n,ncv) c align resid(i) with d1(i) c align v(i,j) with d2(i,j) @@ -346,13 +346,13 @@ c c Cray MPP syntax: c =============== -c Real resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) +c Real resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) c shared resid(block), v(block,:), workd(block,:) c replicated workl(lworkl) -c +c c CM2/CM5 syntax: c ============== -c +c c----------------------------------------------------------------------- c c include 'ex-nonsym.doc' @@ -368,7 +368,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett & Y. Saad, "Complex Shift and Invert Strategies for @@ -379,7 +379,7 @@ c snaup2 ARPACK routine that implements the Implicitly Restarted c Arnoldi Iteration. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine that prints vectors. c slamch LAPACK routine that determines machine constants. c @@ -388,14 +388,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c 12/16/93: Version '1.1' c -c\SCCS Information: @(#) -c FILE: naupd.F SID: 2.10 DATE OF SID: 08/23/02 RELEASE: 2 +c\SCCS Information: @(#) +c FILE: naupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 c c\Remarks c @@ -404,15 +404,15 @@ c----------------------------------------------------------------------- c subroutine snaupd - & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, + & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, & ipntr, workd, workl, lworkl, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -437,13 +437,13 @@ subroutine snaupd c Real & one, zero - parameter (one = 1.0E+0, zero = 0.0E+0) + parameter (one = 1.0E+0 , zero = 0.0E+0 ) c c %---------------% c | Local Scalars | c %---------------% c - integer bounds, ierr, ih, iq, ishift, iupd, iw, + integer bounds, ierr, ih, iq, ishift, iupd, iw, & ldh, ldq, levec, mode, msglvl, mxiter, nb, & nev0, next, np, ritzi, ritzr, j save bounds, ih, iq, ishift, iupd, iw, ldh, ldq, @@ -454,7 +454,7 @@ subroutine snaupd c | External Subroutines | c %----------------------% c - external snaup2, svout, ivout, second, sstatn + external snaup2, svout, ivout, arscnd, sstatn c c %--------------------% c | External Functions | @@ -467,16 +467,16 @@ subroutine snaupd c %-----------------------% c | Executable Statements | c %-----------------------% -c +c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c call sstatn - call second (t0) + call arscnd (t0) msglvl = mnaupd c c %----------------% @@ -498,13 +498,13 @@ subroutine snaupd mode = iparam(7) c if (n .le. 0) then - ierr = -1 + ierr = -1 else if (nev .le. 0) then - ierr = -2 + ierr = -2 else if (ncv .le. nev+1 .or. ncv .gt. n) then - ierr = -3 - else if (mxiter .le. 0) then - ierr = 4 + ierr = -3 + else if (mxiter .le. 0) then + ierr = -4 else if (which .ne. 'LM' .and. & which .ne. 'SM' .and. & which .ne. 'LR' .and. @@ -517,13 +517,13 @@ subroutine snaupd else if (lworkl .lt. 3*ncv**2 + 6*ncv) then ierr = -7 else if (mode .lt. 1 .or. mode .gt. 4) then - ierr = -10 + ierr = -10 else if (mode .eq. 1 .and. bmat .eq. 'G') then - ierr = -11 + ierr = -11 else if (ishift .lt. 0 .or. ishift .gt. 1) then - ierr = -12 + ierr = -12 end if -c +c c %------------% c | Error Exit | c %------------% @@ -533,7 +533,7 @@ subroutine snaupd ido = 99 go to 9000 end if -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -549,8 +549,8 @@ subroutine snaupd c %----------------------------------------------% c np = ncv - nev - nev0 = nev -c + nev0 = nev +c c %-----------------------------% c | Zero out internal workspace | c %-----------------------------% @@ -558,7 +558,7 @@ subroutine snaupd do 10 j = 1, 3*ncv**2 + 6*ncv workl(j) = zero 10 continue -c +c c %-------------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -591,7 +591,7 @@ subroutine snaupd ipntr(6) = ritzr ipntr(7) = ritzi ipntr(8) = bounds - ipntr(14) = iw + ipntr(14) = iw c end if c @@ -599,12 +599,12 @@ subroutine snaupd c | Carry out the Implicitly restarted Arnoldi Iteration. | c %-------------------------------------------------------% c - call snaup2 + call snaup2 & ( ido, bmat, n, which, nev0, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritzr), - & workl(ritzi), workl(bounds), workl(iq), ldq, workl(iw), + & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritzr), + & workl(ritzi), workl(bounds), workl(iq), ldq, workl(iw), & ipntr, workd, info ) -c +c c %--------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP or shifts. | @@ -612,7 +612,7 @@ subroutine snaupd c if (ido .eq. 3) iparam(8) = np if (ido .ne. 99) go to 9000 -c +c iparam(3) = mxiter iparam(5) = np iparam(9) = nopx @@ -628,19 +628,19 @@ subroutine snaupd if (info .eq. 2) info = 3 c if (msglvl .gt. 0) then - call ivout (logfil, 1, mxiter, ndigit, + call ivout (logfil, 1, [mxiter], ndigit, & '_naupd: Number of update iterations taken') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naupd: Number of wanted "converged" Ritz values') - call svout (logfil, np, workl(ritzr), ndigit, + call svout (logfil, np, workl(ritzr), ndigit, & '_naupd: Real part of the final Ritz values') - call svout (logfil, np, workl(ritzi), ndigit, + call svout (logfil, np, workl(ritzi), ndigit, & '_naupd: Imaginary part of the final Ritz values') - call svout (logfil, np, workl(bounds), ndigit, + call svout (logfil, np, workl(bounds), ndigit, & '_naupd: Associated Ritz estimates') end if c - call second (t1) + call arscnd (t1) tnaupd = t1 - t0 c if (msglvl .gt. 0) then @@ -656,8 +656,8 @@ subroutine snaupd 1000 format (//, & 5x, '=============================================',/ & 5x, '= Nonsymmetric implicit Arnoldi update code =',/ - & 5x, '= Version Number: ', ' 2.4', 21x, ' =',/ - & 5x, '= Version Date: ', ' 07/31/96', 16x, ' =',/ + & 5x, '= Version Number: ', ' 2.4' , 21x, ' =',/ + & 5x, '= Version Date: ', ' 07/31/96' , 16x, ' =',/ & 5x, '=============================================',/ & 5x, '= Summary of timing statistics =',/ & 5x, '=============================================',//) diff --git a/Toolbox/arpack-src/snconv.f b/Toolbox/arpack-src/snconv.f index 7194c7584..af94700a9 100644 --- a/Toolbox/arpack-src/snconv.f +++ b/Toolbox/arpack-src/snconv.f @@ -3,7 +3,7 @@ c c\Name: snconv c -c\Description: +c\Description: c Convergence testing for the nonsymmetric Arnoldi eigenvalue routine. c c\Usage: @@ -38,22 +38,22 @@ c xxxxxx real c c\Routines called: -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c slamch LAPACK routine that determines machine constants. c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University +c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: nconv.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\Remarks @@ -69,8 +69,8 @@ subroutine snconv (n, ritzr, ritzi, bounds, tol, nconv) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -106,7 +106,7 @@ subroutine snconv (n, ritzr, ritzi, bounds, tol, nconv) c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %-------------------------------------------------------------% c | Convergence test: unlike in the symmetric code, I am not | c | using things like refined error bounds and gap condition | @@ -119,7 +119,7 @@ subroutine snconv (n, ritzr, ritzi, bounds, tol, nconv) c | for some appropriate choice of norm. | c %-------------------------------------------------------------% c - call second (t0) + call arscnd (t0) c c %---------------------------------% c | Get machine dependent constant. | @@ -133,10 +133,10 @@ subroutine snconv (n, ritzr, ritzi, bounds, tol, nconv) temp = max( eps23, slapy2( ritzr(i), ritzi(i) ) ) if (bounds(i) .le. tol*temp) nconv = nconv + 1 20 continue -c - call second (t1) +c + call arscnd (t1) tnconv = tnconv + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/sneigh.f b/Toolbox/arpack-src/sneigh.f index 96ae87ff2..7ffb48658 100644 --- a/Toolbox/arpack-src/sneigh.f +++ b/Toolbox/arpack-src/sneigh.f @@ -13,7 +13,7 @@ c c\Arguments c RNORM Real scalar. (INPUT) -c Residual norm corresponding to the current upper Hessenberg +c Residual norm corresponding to the current upper Hessenberg c matrix H. c c N Integer. (INPUT) @@ -27,13 +27,13 @@ c program. c c RITZR, Real arrays of length N. (OUTPUT) -c RITZI On output, RITZR(1:N) (resp. RITZI(1:N)) contains the real +c RITZI On output, RITZR(1:N) (resp. RITZI(1:N)) contains the real c (respectively imaginary) parts of the eigenvalues of H. c c BOUNDS Real array of length N. (OUTPUT) c On output, BOUNDS contains the Ritz estimates associated with -c the eigenvalues RITZR and RITZI. This is equal to RNORM -c times the last components of the eigenvectors corresponding +c the eigenvalues RITZR and RITZI. This is equal to RNORM +c times the last components of the eigenvectors corresponding c to the eigenvalues in RITZR and RITZI. c c Q Real N by N array. (WORKSPACE) @@ -49,7 +49,7 @@ c of H and also in the calculation of the eigenvectors of H. c c IERR Integer. (OUTPUT) -c Error exit flag from slaqrb or strevc. +c Error exit flag from slahqr or strevc. c c\EndDoc c @@ -61,9 +61,9 @@ c xxxxxx real c c\Routines called: -c slaqrb ARPACK routine to compute the real Schur form of an +c slahqr LAPACK routine to compute the real Schur form of an c upper Hessenberg matrix and last row of the Schur vectors. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c smout ARPACK utility routine that prints matrices c svout ARPACK utility routine that prints vectors. c slacpy LAPACK matrix copy routine. @@ -74,20 +74,20 @@ c scopy Level 1 BLAS that copies one vector to another . c snrm2 Level 1 BLAS that computes the norm of a vector. c sscal Level 1 BLAS that scales a vector. -c +c c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: neigh.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\Remarks @@ -97,54 +97,54 @@ c c----------------------------------------------------------------------- c - subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, + subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, & q, ldq, workl, ierr) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | c %------------------% c integer ierr, n, ldh, ldq - Real + Real & rnorm c c %-----------------% c | Array Arguments | c %-----------------% c - Real + Real & bounds(n), h(ldh,n), q(ldq,n), ritzi(n), ritzr(n), & workl(n*(n+3)) -c +c c %------------% c | Parameters | c %------------% c - Real + Real & one, zero parameter (one = 1.0E+0, zero = 0.0E+0) -c +c c %------------------------% c | Local Scalars & Arrays | c %------------------------% c logical select(1) integer i, iconj, msglvl - Real + Real & temp, vl(1) c c %----------------------% c | External Subroutines | c %----------------------% c - external scopy, slacpy, slaqrb, strevc, svout, second + external scopy, slacpy, slahqr, strevc, svout, arscnd c c %--------------------% c | External Functions | @@ -170,25 +170,29 @@ subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mneigh -c +c if (msglvl .gt. 2) then - call smout (logfil, n, n, h, ldh, ndigit, + call smout (logfil, n, n, h, ldh, ndigit, & '_neigh: Entering upper Hessenberg matrix H ') end if -c +c c %-----------------------------------------------------------% c | 1. Compute the eigenvalues, the last components of the | c | corresponding Schur vectors and the full Schur form T | c | of the current upper Hessenberg matrix H. | -c | slaqrb returns the full Schur form of H in WORKL(1:N**2) | +c | slahqr returns the full Schur form of H in WORKL(1:N**2) | c | and the last components of the Schur vectors in BOUNDS. | c %-----------------------------------------------------------% c call slacpy ('All', n, n, h, ldh, workl, n) - call slaqrb (.true., n, 1, n, workl, n, ritzr, ritzi, bounds, - & ierr) + do 5 j = 1, n-1 + bounds(j) = zero + 5 continue + bounds(n) = one + call slahqr(.true., .true., n, 1, n, workl, n, ritzr, ritzi, 1, 1, + & bounds, 1, ierr) if (ierr .ne. 0) go to 9000 c if (msglvl .gt. 1) then @@ -227,7 +231,7 @@ subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c %----------------------% c | Real eigenvalue case | c %----------------------% -c +c temp = snrm2( n, q(1,i), 1 ) call sscal ( n, one / temp, q(1,i), 1 ) else @@ -241,7 +245,7 @@ subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c %-------------------------------------------% c if (iconj .eq. 0) then - temp = slapy2( snrm2( n, q(1,i), 1 ), + temp = slapy2( snrm2( n, q(1,i), 1 ), & snrm2( n, q(1,i+1), 1 ) ) call sscal ( n, one / temp, q(1,i), 1 ) call sscal ( n, one / temp, q(1,i+1), 1 ) @@ -249,7 +253,7 @@ subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, else iconj = 0 end if - end if + end if 10 continue c call sgemv ('T', n, n, one, q, ldq, bounds, 1, zero, workl, 1) @@ -270,7 +274,7 @@ subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, c %----------------------% c | Real eigenvalue case | c %----------------------% -c +c bounds(i) = rnorm * abs( workl(i) ) else c @@ -301,7 +305,7 @@ subroutine sneigh (rnorm, n, h, ldh, ritzr, ritzi, bounds, & '_neigh: Ritz estimates for the eigenvalues of H') end if c - call second (t1) + call arscnd (t1) tneigh = tneigh + (t1 - t0) c 9000 continue diff --git a/Toolbox/arpack-src/sneupd.f b/Toolbox/arpack-src/sneupd.f index 890140b80..1c2c7ce16 100644 --- a/Toolbox/arpack-src/sneupd.f +++ b/Toolbox/arpack-src/sneupd.f @@ -2,7 +2,7 @@ c c\Name: sneupd c -c\Description: +c\Description: c c This subroutine returns the converged approximations to eigenvalues c of A*z = lambda*B*z and (optionally): @@ -28,34 +28,34 @@ c invariant subspace corresponding to these Ritz values is referred to as a c Schur basis. c -c See documentation in the header of the subroutine SNAUPD for +c See documentation in the header of the subroutine SNAUPD for c definition of OP as well as other terms and the relation of computed c Ritz values and Ritz vectors of OP with respect to the given problem -c A*z = lambda*B*z. For a brief description, see definitions of +c A*z = lambda*B*z. For a brief description, see definitions of c IPARAM(7), MODE and WHICH in the documentation of SNAUPD. c c\Usage: -c call sneupd -c ( RVEC, HOWMNY, SELECT, DR, DI, Z, LDZ, SIGMAR, SIGMAI, WORKEV, BMAT, -c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, WORKL, +c call sneupd +c ( RVEC, HOWMNY, SELECT, DR, DI, Z, LDZ, SIGMAR, SIGMAI, WORKEV, BMAT, +c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, WORKL, c LWORKL, INFO ) c c\Arguments: -c RVEC LOGICAL (INPUT) -c Specifies whether a basis for the invariant subspace corresponding -c to the converged Ritz value approximations for the eigenproblem +c RVEC LOGICAL (INPUT) +c Specifies whether a basis for the invariant subspace corresponding +c to the converged Ritz value approximations for the eigenproblem c A*z = lambda*B*z is computed. c c RVEC = .FALSE. Compute Ritz values only. c c RVEC = .TRUE. Compute the Ritz vectors or Schur vectors. -c See Remarks below. -c -c HOWMNY Character*1 (INPUT) -c Specifies the form of the basis for the invariant subspace +c See Remarks below. +c +c HOWMNY Character*1 (INPUT) +c Specifies the form of the basis for the invariant subspace c corresponding to the converged Ritz values that is to be computed. c -c = 'A': Compute NEV Ritz vectors; +c = 'A': Compute NEV Ritz vectors; c = 'P': Compute NEV Schur vectors; c = 'S': compute some of the Ritz vectors, specified c by the logical array SELECT. @@ -63,43 +63,43 @@ c SELECT Logical array of dimension NCV. (INPUT) c If HOWMNY = 'S', SELECT specifies the Ritz vectors to be c computed. To select the Ritz vector corresponding to a -c Ritz value (DR(j), DI(j)), SELECT(j) must be set to .TRUE.. +c Ritz value (DR(j), DI(j)), SELECT(j) must be set to .TRUE.. c If HOWMNY = 'A' or 'P', SELECT is used as internal workspace. c c DR Real array of dimension NEV+1. (OUTPUT) -c If IPARAM(7) = 1,2 or 3 and SIGMAI=0.0 then on exit: DR contains -c the real part of the Ritz approximations to the eigenvalues of -c A*z = lambda*B*z. +c If IPARAM(7) = 1,2 or 3 and SIGMAI=0.0 then on exit: DR contains +c the real part of the Ritz approximations to the eigenvalues of +c A*z = lambda*B*z. c If IPARAM(7) = 3, 4 and SIGMAI is not equal to zero, then on exit: -c DR contains the real part of the Ritz values of OP computed by +c DR contains the real part of the Ritz values of OP computed by c SNAUPD. A further computation must be performed by the user c to transform the Ritz values computed for OP by SNAUPD to those c of the original system A*z = lambda*B*z. See remark 3 below. c c DI Real array of dimension NEV+1. (OUTPUT) -c On exit, DI contains the imaginary part of the Ritz value +c On exit, DI contains the imaginary part of the Ritz value c approximations to the eigenvalues of A*z = lambda*B*z associated c with DR. c -c NOTE: When Ritz values are complex, they will come in complex -c conjugate pairs. If eigenvectors are requested, the -c corresponding Ritz vectors will also come in conjugate -c pairs and the real and imaginary parts of these are -c represented in two consecutive columns of the array Z +c NOTE: When Ritz values are complex, they will come in complex +c conjugate pairs. If eigenvectors are requested, the +c corresponding Ritz vectors will also come in conjugate +c pairs and the real and imaginary parts of these are +c represented in two consecutive columns of the array Z c (see below). c c Z Real N by NEV+1 array if RVEC = .TRUE. and HOWMNY = 'A'. (OUTPUT) -c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of -c Z represent approximate eigenvectors (Ritz vectors) corresponding -c to the NCONV=IPARAM(5) Ritz values for eigensystem -c A*z = lambda*B*z. -c -c The complex Ritz vector associated with the Ritz value -c with positive imaginary part is stored in two consecutive -c columns. The first column holds the real part of the Ritz -c vector and the second column holds the imaginary part. The -c Ritz vector associated with the Ritz value with negative -c imaginary part is simply the complex conjugate of the Ritz vector +c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of +c Z represent approximate eigenvectors (Ritz vectors) corresponding +c to the NCONV=IPARAM(5) Ritz values for eigensystem +c A*z = lambda*B*z. +c +c The complex Ritz vector associated with the Ritz value +c with positive imaginary part is stored in two consecutive +c columns. The first column holds the real part of the Ritz +c vector and the second column holds the imaginary part. The +c Ritz vector associated with the Ritz value with negative +c imaginary part is simply the complex conjugate of the Ritz vector c associated with the positive imaginary part. c c If RVEC = .FALSE. or HOWMNY = 'P', then Z is not referenced. @@ -114,11 +114,11 @@ c desired, then LDZ >= max( 1, N ). In any case, LDZ >= 1. c c SIGMAR Real (INPUT) -c If IPARAM(7) = 3 or 4, represents the real part of the shift. +c If IPARAM(7) = 3 or 4, represents the real part of the shift. c Not referenced if IPARAM(7) = 1 or 2. c c SIGMAI Real (INPUT) -c If IPARAM(7) = 3 or 4, represents the imaginary part of the shift. +c If IPARAM(7) = 3 or 4, represents the imaginary part of the shift. c Not referenced if IPARAM(7) = 1 or 2. See remark 3 below. c c WORKEV Real work array of dimension 3*NCV. (WORKSPACE) @@ -183,10 +183,10 @@ c c = 1: The Schur form computed by LAPACK routine slahqr c could not be reordered by LAPACK routine strsen. -c Re-enter subroutine sneupd with IPARAM(5)=NCV and -c increase the size of the arrays DR and DI to have -c dimension at least dimension NCV and allocate at least NCV -c columns for Z. NOTE: Not necessary if Z and V share +c Re-enter subroutine sneupd with IPARAM(5)=NCV and +c increase the size of the arrays DR and DI to have +c dimension at least dimension NCV and allocate at least NCV +c columns for Z. NOTE: Not necessary if Z and V share c the same space. Please notify the authors if this error c occurs. c @@ -218,7 +218,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett & Y. Saad, "Complex Shift and Invert Strategies for @@ -229,7 +229,7 @@ c ivout ARPACK utility routine that prints integers. c smout ARPACK utility routine that prints matrices c svout ARPACK utility routine that prints vectors. -c sgeqr2 LAPACK routine that computes the QR factorization of +c sgeqr2 LAPACK routine that computes the QR factorization of c a matrix. c slacpy LAPACK matrix copy routine. c slahqr LAPACK routine to compute the real Schur form of an @@ -237,7 +237,7 @@ c slamch LAPACK routine that determines machine constants. c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c slaset LAPACK matrix initialization routine. -c sorm2r LAPACK routine that applies an orthogonal matrix in +c sorm2r LAPACK routine that applies an orthogonal matrix in c factored form. c strevc LAPACK routine to compute the eigenvectors of a matrix c in upper quasi-triangular form. @@ -259,10 +259,10 @@ c Ritz vectors. Thus, their numerical properties are often superior. c If RVEC = .TRUE. then the relationship c A * V(:,1:IPARAM(5)) = V(:,1:IPARAM(5)) * T, and -c trans(V(:,1:IPARAM(5))) * V(:,1:IPARAM(5)) = I are approximately -c satisfied. Here T is the leading submatrix of order IPARAM(5) of the +c trans(V(:,1:IPARAM(5))) * V(:,1:IPARAM(5)) = I are approximately +c satisfied. Here T is the leading submatrix of order IPARAM(5) of the c real upper quasi-triangular matrix stored workl(ipntr(12)). That is, -c T is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +c T is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; c each 2-by-2 diagonal block has its diagonal elements equal and its c off-diagonal elements of opposite sign. Corresponding to each 2-by-2 c diagonal block is a complex conjugate pair of Ritz values. The real @@ -270,14 +270,14 @@ c c 3. If IPARAM(7) = 3 or 4 and SIGMAI is not equal zero, then the user must c form the IPARAM(5) Rayleigh quotients in order to transform the Ritz -c values computed by SNAUPD for OP to those of A*z = lambda*B*z. +c values computed by SNAUPD for OP to those of A*z = lambda*B*z. c Set RVEC = .true. and HOWMNY = 'A', and -c compute +c compute c trans(Z(:,I)) * A * Z(:,I) if DI(I) = 0. -c If DI(I) is not equal to zero and DI(I+1) = - D(I), +c If DI(I) is not equal to zero and DI(I+1) = - D(I), c then the desired real and imaginary parts of the Ritz value are c trans(Z(:,I)) * A * Z(:,I) + trans(Z(:,I+1)) * A * Z(:,I+1), -c trans(Z(:,I)) * A * Z(:,I+1) - trans(Z(:,I+1)) * A * Z(:,I), +c trans(Z(:,I)) * A * Z(:,I+1) - trans(Z(:,I+1)) * A * Z(:,I), c respectively. c Another possibility is to set RVEC = .true. and HOWMNY = 'P' and c compute trans(V(:,1:IPARAM(5))) * A * V(:,1:IPARAM(5)) and then an upper @@ -286,20 +286,20 @@ c c\Authors c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University +c Richard Lehoucq CRPC / Rice University c Chao Yang Houston, Texas c Dept. of Computational & -c Applied Mathematics -c Rice University -c Houston, Texas -c -c\SCCS Information: @(#) -c FILE: neupd.F SID: 2.7 DATE OF SID: 09/20/00 RELEASE: 2 +c Applied Mathematics +c Rice University +c Houston, Texas +c +c\SCCS Information: @(#) +c FILE: neupd.F SID: 2.7 DATE OF SID: 09/20/00 RELEASE: 2 c c\EndLib c c----------------------------------------------------------------------- - subroutine sneupd(rvec , howmny, select, dr , di, + subroutine sneupd(rvec , howmny, select, dr , di, & z , ldz , sigmar, sigmai, workev, & bmat , n , which , nev , tol, & resid, ncv , v , ldv , iparam, @@ -309,8 +309,8 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -319,7 +319,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, character bmat, howmny, which*2 logical rvec integer info, ldz, ldv, lworkl, n, ncv, nev - Real + Real & sigmar, sigmai, tol c c %-----------------% @@ -328,16 +328,16 @@ subroutine sneupd(rvec , howmny, select, dr , di, c integer iparam(11), ipntr(14) logical select(ncv) - Real - & dr(nev+1) , di(nev+1), resid(n) , - & v(ldv,ncv) , z(ldz,*) , workd(3*n), + Real + & dr(nev+1) , di(nev+1), resid(n) , + & v(ldv,ncv) , z(ldz,*) , workd(3*n), & workl(lworkl), workev(3*ncv) c c %------------% c | Parameters | c %------------% c - Real + Real & one, zero parameter (one = 1.0E+0 , zero = 0.0E+0 ) c @@ -346,16 +346,16 @@ subroutine sneupd(rvec , howmny, select, dr , di, c %---------------% c character type*6 - integer bounds, ierr , ih , ihbds , - & iheigr, iheigi, iconj , nconv , + integer bounds, ierr , ih , ihbds , + & iheigr, iheigi, iconj , nconv , & invsub, iuptri, iwev , iwork(1), & j , k , ldh , ldq , & mode , msglvl, outncv, ritzr , & ritzi , wri , wrr , irr , & iri , ibd , ishift, numcnv , - & np , jj + & np , jj , nconv2 logical reord - Real + Real & conds , rnorm, sep , temp, & vl(1,1), temp1, eps23 c @@ -363,16 +363,16 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | External Subroutines | c %----------------------% c - external scopy , sger , sgeqr2, slacpy, - & slahqr, slaset, smout , sorm2r, - & strevc, strmm , strsen, sscal , + external scopy , sger , sgeqr2, slacpy, + & slahqr, slaset, smout , sorm2r, + & strevc, strmm , strsen, sscal , & svout , ivout c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & slapy2, snrm2, slamch, sdot external slapy2, snrm2, slamch, sdot c @@ -385,7 +385,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -434,7 +434,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, else if (howmny .eq. 'S' ) then ierr = -12 end if -c +c if (mode .eq. 1 .or. mode .eq. 2) then type = 'REGULR' else if (mode .eq. 3 .and. sigmai .eq. zero) then @@ -443,7 +443,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, type = 'REALPT' else if (mode .eq. 4 ) then type = 'IMAGPT' - else + else ierr = -10 end if if (mode .eq. 1 .and. bmat .eq. 'G') ierr = -11 @@ -456,7 +456,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, info = ierr go to 9000 end if -c +c c %--------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -483,7 +483,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | subspace for H. | c | GRAND total of NCV * ( 3 * NCV + 6 ) locations. | c %-----------------------------------------------------------% -c +c ih = ipntr(5) ritzr = ipntr(6) ritzi = ipntr(7) @@ -537,7 +537,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, end if c if (rvec) then -c +c reord = .false. c c %---------------------------------------------------% @@ -562,7 +562,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, c np = ncv - nev ishift = 0 - call sngets(ishift , which , nev , + call sngets(ishift , which , nev , & np , workl(irr), workl(iri), & workl(bounds), workl , workl(np+1)) c @@ -589,7 +589,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, & workl(ibd+jj-1) .le. tol*temp1) then select(jj) = .true. numcnv = numcnv + 1 - if (jj .gt. nev) reord = .true. + if (jj .gt. nconv) reord = .true. endif 11 continue c @@ -601,9 +601,9 @@ subroutine sneupd(rvec , howmny, select, dr , di, c %-----------------------------------------------------------% c if (msglvl .gt. 2) then - call ivout(logfil, 1, numcnv, ndigit, + call ivout(logfil, 1, [numcnv], ndigit, & '_neupd: Number of specified eigenvalues') - call ivout(logfil, 1, nconv, ndigit, + call ivout(logfil, 1, [nconv], ndigit, & '_neupd: Number of "converged" eigenvalues') end if c @@ -618,24 +618,24 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | Make a copy of the upper Hessenberg matrix. | c | Initialize the Schur vector matrix Q to the identity. | c %-----------------------------------------------------------% -c +c call scopy(ldh*ncv, workl(ih), 1, workl(iuptri), 1) - call slaset('All', ncv, ncv, + call slaset('All', ncv, ncv, & zero , one, workl(invsub), & ldq) - call slahqr(.true., .true. , ncv, - & 1 , ncv , workl(iuptri), + call slahqr(.true., .true. , ncv, + & 1 , ncv , workl(iuptri), & ldh , workl(iheigr), workl(iheigi), - & 1 , ncv , workl(invsub), + & 1 , ncv , workl(invsub), & ldq , ierr) - call scopy(ncv , workl(invsub+ncv-1), ldq, + call scopy(ncv , workl(invsub+ncv-1), ldq, & workl(ihbds), 1) -c +c if (ierr .ne. 0) then info = -8 go to 9000 end if -c +c if (msglvl .gt. 1) then call svout(logfil, ncv, workl(iheigr), ndigit, & '_neupd: Real part of the eigenvalues of H') @@ -644,28 +644,32 @@ subroutine sneupd(rvec , howmny, select, dr , di, call svout(logfil, ncv, workl(ihbds), ndigit, & '_neupd: Last row of the Schur vector matrix') if (msglvl .gt. 3) then - call smout(logfil , ncv, ncv , + call smout(logfil , ncv, ncv , & workl(iuptri), ldh, ndigit, & '_neupd: The upper quasi-triangular matrix ') end if - end if + end if c if (reord) then -c +c c %-----------------------------------------------------% -c | Reorder the computed upper quasi-triangular matrix. | +c | Reorder the computed upper quasi-triangular matrix. | c %-----------------------------------------------------% -c - call strsen('None' , 'V' , +c + call strsen('None' , 'V' , & select , ncv , - & workl(iuptri), ldh , - & workl(invsub), ldq , - & workl(iheigr), workl(iheigi), - & nconv , conds , - & sep , workl(ihbds) , + & workl(iuptri), ldh , + & workl(invsub), ldq , + & workl(iheigr), workl(iheigi), + & nconv2 , conds , + & sep , workl(ihbds) , & ncv , iwork , & 1 , ierr) c + if (nconv2 .lt. nconv) then + nconv = nconv2 + end if + if (ierr .eq. 1) then info = 1 go to 9000 @@ -677,12 +681,12 @@ subroutine sneupd(rvec , howmny, select, dr , di, call svout(logfil, ncv, workl(iheigi), ndigit, & '_neupd: Imag part of the eigenvalues of H--reordered') if (msglvl .gt. 3) then - call smout(logfil , ncv, ncv , + call smout(logfil , ncv, ncv , & workl(iuptri), ldq, ndigit, & '_neupd: Quasi-triangular matrix after re-ordering') end if end if -c +c end if c c %---------------------------------------% @@ -699,23 +703,23 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | if a spectral transformation was not used. | c %----------------------------------------------------% c - if (type .eq. 'REGULR') then + if (type .eq. 'REGULR') then call scopy(nconv, workl(iheigr), 1, dr, 1) call scopy(nconv, workl(iheigi), 1, di, 1) end if -c +c c %----------------------------------------------------------% c | Compute the QR factorization of the matrix representing | c | the wanted invariant subspace located in the first NCONV | c | columns of workl(invsub,ldq). | c %----------------------------------------------------------% -c - call sgeqr2(ncv, nconv , workl(invsub), +c + call sgeqr2(ncv, nconv , workl(invsub), & ldq, workev, workev(ncv+1), & ierr) c c %---------------------------------------------------------% -c | * Postmultiply V by Q using sorm2r. | +c | * Postmultiply V by Q using sorm2r. | c | * Copy the first NCONV columns of VQ into Z. | c | * Postmultiply Z by R. | c | The N by NCONV matrix Z is now a matrix representation | @@ -725,15 +729,15 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | vectors associated with the real upper quasi-triangular | c | matrix of order NCONV in workl(iuptri) | c %---------------------------------------------------------% -c - call sorm2r('Right', 'Notranspose', n , +c + call sorm2r('Right', 'Notranspose', n , & ncv , nconv , workl(invsub), - & ldq , workev , v , + & ldq , workev , v , & ldv , workd(n+1) , ierr) call slacpy('All', n, nconv, v, ldv, z, ldz) c do 20 j=1, nconv -c +c c %---------------------------------------------------% c | Perform both a column and row scaling if the | c | diagonal element of workl(invsub,ldq) is negative | @@ -742,21 +746,21 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | Note that since Q is orthogonal, R is a diagonal | c | matrix consisting of plus or minus ones | c %---------------------------------------------------% -c +c if (workl(invsub+(j-1)*ldq+j-1) .lt. zero) then call sscal(nconv, -one, workl(iuptri+j-1), ldq) call sscal(nconv, -one, workl(iuptri+(j-1)*ldq), 1) end if -c +c 20 continue -c +c if (howmny .eq. 'A') then -c +c c %--------------------------------------------% -c | Compute the NCONV wanted eigenvectors of T | +c | Compute the NCONV wanted eigenvectors of T | c | located in workl(iuptri,ldq). | c %--------------------------------------------% -c +c do 30 j=1, ncv if (j .le. nconv) then select(j) = .true. @@ -765,8 +769,8 @@ subroutine sneupd(rvec , howmny, select, dr , di, end if 30 continue c - call strevc('Right', 'Select' , select , - & ncv , workl(iuptri), ldq , + call strevc('Right', 'Select' , select , + & ncv , workl(iuptri), ldq , & vl , 1 , workl(invsub), & ldq , ncv , outncv , & workev , ierr) @@ -775,7 +779,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, info = -9 go to 9000 end if -c +c c %------------------------------------------------% c | Scale the returning eigenvectors so that their | c | Euclidean norms are all one. LAPACK subroutine | @@ -783,22 +787,22 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | that the element of largest magnitude has | c | magnitude 1; | c %------------------------------------------------% -c +c iconj = 0 do 40 j=1, nconv c if ( workl(iheigi+j-1) .eq. zero ) then -c +c c %----------------------% c | real eigenvalue case | c %----------------------% -c +c temp = snrm2( ncv, workl(invsub+(j-1)*ldq), 1 ) - call sscal( ncv, one / temp, + call sscal( ncv, one / temp, & workl(invsub+(j-1)*ldq), 1 ) c else -c +c c %-------------------------------------------% c | Complex conjugate pair case. Note that | c | since the real and imaginary part of | @@ -808,15 +812,15 @@ subroutine sneupd(rvec , howmny, select, dr , di, c %-------------------------------------------% c if (iconj .eq. 0) then - temp = slapy2(snrm2(ncv, - & workl(invsub+(j-1)*ldq), + temp = slapy2(snrm2(ncv, + & workl(invsub+(j-1)*ldq), & 1), - & snrm2(ncv, + & snrm2(ncv, & workl(invsub+j*ldq), - & 1)) - call sscal(ncv, one/temp, + & 1)) + call sscal(ncv, one/temp, & workl(invsub+(j-1)*ldq), 1 ) - call sscal(ncv, one/temp, + call sscal(ncv, one/temp, & workl(invsub+j*ldq), 1 ) iconj = 1 else @@ -856,7 +860,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, call svout(logfil, ncv, workl(ihbds), ndigit, & '_neupd: Last row of the eigenvector matrix for T') if (msglvl .gt. 3) then - call smout(logfil, ncv, ncv, workl(invsub), ldq, + call smout(logfil, ncv, ncv, workl(invsub), ldq, & ndigit, '_neupd: The eigenvector matrix for T') end if end if @@ -872,32 +876,32 @@ subroutine sneupd(rvec , howmny, select, dr , di, c | associated with leading portion of T in the first NCONV | c | columns of workl(invsub,ldq). | c %---------------------------------------------------------% -c - call sgeqr2(ncv, nconv , workl(invsub), +c + call sgeqr2(ncv, nconv , workl(invsub), & ldq, workev, workev(ncv+1), & ierr) -c +c c %----------------------------------------------% -c | * Postmultiply Z by Q. | +c | * Postmultiply Z by Q. | c | * Postmultiply Z by R. | -c | The N by NCONV matrix Z is now contains the | +c | The N by NCONV matrix Z is now contains the | c | Ritz vectors associated with the Ritz values | c | in workl(iheigr) and workl(iheigi). | c %----------------------------------------------% -c +c call sorm2r('Right', 'Notranspose', n , & ncv , nconv , workl(invsub), & ldq , workev , z , & ldz , workd(n+1) , ierr) -c +c call strmm('Right' , 'Upper' , 'No transpose', & 'Non-unit', n , nconv , & one , workl(invsub), ldq , & z , ldz) -c +c end if -c - else +c + else c c %------------------------------------------------------% c | An approximate invariant subspace is not needed. | @@ -910,7 +914,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, call scopy(nconv, workl(ritzi), 1, workl(iheigi), 1) call scopy(nconv, workl(bounds), 1, workl(ihbds), 1) end if -c +c c %------------------------------------------------% c | Transform the Ritz values and possibly vectors | c | and corresponding error bounds of OP to those | @@ -919,26 +923,26 @@ subroutine sneupd(rvec , howmny, select, dr , di, c if (type .eq. 'REGULR') then c - if (rvec) - & call sscal(ncv, rnorm, workl(ihbds), 1) -c - else -c + if (rvec) + & call sscal(ncv, rnorm, workl(ihbds), 1) +c + else +c c %---------------------------------------% c | A spectral transformation was used. | c | * Determine the Ritz estimates of the | c | Ritz values in the original system. | c %---------------------------------------% -c +c if (type .eq. 'SHIFTI') then c - if (rvec) + if (rvec) & call sscal(ncv, rnorm, workl(ihbds), 1) c do 50 k=1, ncv - temp = slapy2( workl(iheigr+k-1), + temp = slapy2( workl(iheigr+k-1), & workl(iheigi+k-1) ) - workl(ihbds+k-1) = abs( workl(ihbds+k-1) ) + workl(ihbds+k-1) = abs( workl(ihbds+k-1) ) & / temp / temp 50 continue c @@ -953,26 +957,26 @@ subroutine sneupd(rvec , howmny, select, dr , di, 70 continue c end if -c +c c %-----------------------------------------------------------% c | * Transform the Ritz values back to the original system. | c | For TYPE = 'SHIFTI' the transformation is | c | lambda = 1/theta + sigma | c | For TYPE = 'REALPT' or 'IMAGPT' the user must from | -c | Rayleigh quotients or a projection. See remark 3 above.| +c | Rayleigh quotients or a projection. See remark 3 above.| c | NOTES: | c | *The Ritz vectors are not affected by the transformation. | c %-----------------------------------------------------------% -c - if (type .eq. 'SHIFTI') then +c + if (type .eq. 'SHIFTI') then c do 80 k=1, ncv - temp = slapy2( workl(iheigr+k-1), + temp = slapy2( workl(iheigr+k-1), & workl(iheigi+k-1) ) - workl(iheigr+k-1) = workl(iheigr+k-1)/temp/temp - & + sigmar + workl(iheigr+k-1) = workl(iheigr+k-1)/temp/temp + & + sigmar workl(iheigi+k-1) = -workl(iheigi+k-1)/temp/temp - & + sigmai + & + sigmai 80 continue c call scopy(nconv, workl(iheigr), 1, dr, 1) @@ -989,9 +993,9 @@ subroutine sneupd(rvec , howmny, select, dr , di, c if (type .eq. 'SHIFTI' .and. msglvl .gt. 1) then call svout(logfil, nconv, dr, ndigit, - & '_neupd: Untransformed real part of the Ritz valuess.') + & '_neupd: Untransformed real part of the Ritz values.') call svout (logfil, nconv, di, ndigit, - & '_neupd: Untransformed imag part of the Ritz valuess.') + & '_neupd: Untransformed imag part of the Ritz values.') call svout(logfil, nconv, workl(ihbds), ndigit, & '_neupd: Ritz estimates of untransformed Ritz values.') else if (type .eq. 'REGULR' .and. msglvl .gt. 1) then @@ -1002,7 +1006,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, call svout(logfil, nconv, workl(ihbds), ndigit, & '_neupd: Associated Ritz estimates.') end if -c +c c %-------------------------------------------------% c | Eigenvector Purification step. Formally perform | c | one of inverse subspace iteration. Only used | @@ -1024,19 +1028,22 @@ subroutine sneupd(rvec , howmny, select, dr , di, c iconj = 0 do 110 j=1, nconv - if (workl(iheigi+j-1) .eq. zero) then + if ((workl(iheigi+j-1) .eq. zero) .and. + & (workl(iheigr+j-1) .ne. zero)) then workev(j) = workl(invsub+(j-1)*ldq+ncv-1) / & workl(iheigr+j-1) else if (iconj .eq. 0) then temp = slapy2( workl(iheigr+j-1), workl(iheigi+j-1) ) - workev(j) = ( workl(invsub+(j-1)*ldq+ncv-1) * - & workl(iheigr+j-1) + - & workl(invsub+j*ldq+ncv-1) * - & workl(iheigi+j-1) ) / temp / temp - workev(j+1) = ( workl(invsub+j*ldq+ncv-1) * - & workl(iheigr+j-1) - - & workl(invsub+(j-1)*ldq+ncv-1) * - & workl(iheigi+j-1) ) / temp / temp + if (temp. ne. zero) then + workev(j) = ( workl(invsub+(j-1)*ldq+ncv-1) * + & workl(iheigr+j-1) + + & workl(invsub+j*ldq+ncv-1) * + & workl(iheigi+j-1) ) / temp / temp + workev(j+1) = ( workl(invsub+j*ldq+ncv-1) * + & workl(iheigr+j-1) - + & workl(invsub+(j-1)*ldq+ncv-1) * + & workl(iheigi+j-1) ) / temp / temp + end if iconj = 1 else iconj = 0 @@ -1055,7 +1062,7 @@ subroutine sneupd(rvec , howmny, select, dr , di, 9000 continue c return -c +c c %---------------% c | End of SNEUPD | c %---------------% diff --git a/Toolbox/arpack-src/sngets.f b/Toolbox/arpack-src/sngets.f index dd15a0b3e..7e48c0bb1 100644 --- a/Toolbox/arpack-src/sngets.f +++ b/Toolbox/arpack-src/sngets.f @@ -3,9 +3,9 @@ c c\Name: sngets c -c\Description: +c\Description: c Given the eigenvalues of the upper Hessenberg matrix H, -c computes the NP shifts AMU that are zeros of the polynomial of +c computes the NP shifts AMU that are zeros of the polynomial of c degree NP which filters out components of the unwanted eigenvectors c corresponding to the AMU's based on some given criteria. c @@ -42,12 +42,12 @@ c pairs together. c c RITZR, Real array of length KEV+NP. (INPUT/OUTPUT) -c RITZI On INPUT, RITZR and RITZI contain the real and imaginary +c RITZI On INPUT, RITZR and RITZI contain the real and imaginary c parts of the eigenvalues of H. c On OUTPUT, RITZR and RITZI are sorted so that the unwanted c eigenvalues are in the first NP locations and the wanted -c portion is in the last KEV locations. When exact shifts are -c selected, the unwanted part corresponds to the shifts to +c portion is in the last KEV locations. When exact shifts are +c selected, the unwanted part corresponds to the shifts to c be applied. Also, if ISHIFT .eq. 1, the unwanted eigenvalues c are further sorted so that the ones with largest Ritz values c are first. @@ -56,7 +56,7 @@ c Error bounds corresponding to the ordering in RITZ. c c SHIFTR, SHIFTI *** USE deprecated as of version 2.1. *** -c +c c c\EndDoc c @@ -76,13 +76,13 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: ngets.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\Remarks @@ -99,8 +99,8 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -114,7 +114,7 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c %-----------------% c Real - & bounds(kev+np), ritzr(kev+np), ritzi(kev+np), + & bounds(kev+np), ritzr(kev+np), ritzi(kev+np), & shiftr(1), shifti(1) c c %------------% @@ -135,7 +135,7 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | External Subroutines | c %----------------------% c - external scopy, ssortc, second + external scopy, ssortc, arscnd c c %----------------------% c | Intrinsics Functions | @@ -151,10 +151,10 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% -c - call second (t0) +c + call arscnd (t0) msglvl = mngets -c +c c %----------------------------------------------------% c | LM, SM, LR, SR, LI, SI case. | c | Sort the eigenvalues of H into the desired order | @@ -178,16 +178,16 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, else if (which .eq. 'SI') then call ssortc ('SM', .true., kev+np, ritzr, ritzi, bounds) end if -c +c call ssortc (which, .true., kev+np, ritzr, ritzi, bounds) -c +c c %-------------------------------------------------------% c | Increase KEV by one if the ( ritzr(np),ritzi(np) ) | c | = ( ritzr(np+1),-ritzi(np+1) ) and ritz(np) .ne. zero | c | Accordingly decrease NP by one. In other words keep | c | complex conjugate pairs together. | c %-------------------------------------------------------% -c +c if ( ( ritzr(np+1) - ritzr(np) ) .eq. zero & .and. ( ritzi(np+1) + ritzi(np) ) .eq. zero ) then np = np - 1 @@ -195,7 +195,7 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, end if c if ( ishift .eq. 1 ) then -c +c c %-------------------------------------------------------% c | Sort the unwanted Ritz values used as shifts so that | c | the ones with largest Ritz estimates are first | @@ -204,28 +204,28 @@ subroutine sngets ( ishift, which, kev, np, ritzr, ritzi, bounds, c | are applied in subroutine snapps. | c | Be careful and use 'SR' since we want to sort BOUNDS! | c %-------------------------------------------------------% -c +c call ssortc ( 'SR', .true., np, bounds, ritzr, ritzi ) end if -c - call second (t1) +c + call arscnd (t1) tngets = tngets + (t1 - t0) c if (msglvl .gt. 0) then - call ivout (logfil, 1, kev, ndigit, '_ngets: KEV is') - call ivout (logfil, 1, np, ndigit, '_ngets: NP is') + call ivout (logfil, 1, [kev], ndigit, '_ngets: KEV is') + call ivout (logfil, 1, [np], ndigit, '_ngets: NP is') call svout (logfil, kev+np, ritzr, ndigit, & '_ngets: Eigenvalues of current H matrix -- real part') call svout (logfil, kev+np, ritzi, ndigit, & '_ngets: Eigenvalues of current H matrix -- imag part') - call svout (logfil, kev+np, bounds, ndigit, + call svout (logfil, kev+np, bounds, ndigit, & '_ngets: Ritz estimates of the current KEV+NP Ritz values') end if -c +c return -c +c c %---------------% c | End of sngets | c %---------------% -c +c end diff --git a/Toolbox/arpack-src/ssaitr.f b/Toolbox/arpack-src/ssaitr.f index aacb9ceeb..a5df2c2ec 100644 --- a/Toolbox/arpack-src/ssaitr.f +++ b/Toolbox/arpack-src/ssaitr.f @@ -3,8 +3,8 @@ c c\Name: ssaitr c -c\Description: -c Reverse communication interface for applying NP additional steps to +c\Description: +c Reverse communication interface for applying NP additional steps to c a K step symmetric Arnoldi factorization. c c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T @@ -20,7 +20,7 @@ c c\Usage: c call ssaitr -c ( IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH, +c ( IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH, c IPNTR, WORKD, INFO ) c c\Arguments @@ -76,13 +76,13 @@ c On OUTPUT the B-norm of the updated residual r_{k+p}. c c V Real N by K+NP array. (INPUT/OUTPUT) -c On INPUT: V contains the Arnoldi vectors in the first K +c On INPUT: V contains the Arnoldi vectors in the first K c columns. c On OUTPUT: V contains the new NP Arnoldi vectors in the next c NP columns. The first K columns are unchanged. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Real (K+NP) by 2 array. (INPUT/OUTPUT) @@ -91,26 +91,26 @@ c and the main diagonal in the second column. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORK for +c Pointer to mark the starting locations in the WORK for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Real work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The calling program should not +c for reverse communication. The calling program should not c use WORKD as temporary workspace during the iteration !!!!!! c On INPUT, WORKD(1:N) = B*RESID where RESID is associated -c with the K step Arnoldi factorization. Used to save some -c computation at the first step. +c with the K step Arnoldi factorization. Used to save some +c computation at the first step. c On OUTPUT, WORKD(1:N) = B*RESID where RESID is associated c with the K+NP step Arnoldi factorization. c @@ -139,7 +139,7 @@ c saxpy Level 1 BLAS that computes a vector triad. c sscal Level 1 BLAS that scales a vector. c scopy Level 1 BLAS that copies one vector to another . -c sdot Level 1 BLAS that computes the scalar product of two vectors. +c sdot Level 1 BLAS that computes the scalar product of two vectors. c snrm2 Level 1 BLAS that computes the norm of a vector. c c\Author @@ -147,29 +147,29 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c xx/xx/93: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: saitr.F SID: 2.6 DATE OF SID: 8/28/96 RELEASE: 2 c c\Remarks c The algorithm implemented is: -c +c c restart = .false. -c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; +c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; c r_{k} contains the initial residual vector even for k = 0; -c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already +c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already c computed by the calling program. c c betaj = rnorm ; p_{k+1} = B*r_{k} ; c For j = k+1, ..., k+np Do c 1) if ( betaj < tol ) stop or restart depending on j. c if ( restart ) generate a new starting vector. -c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; +c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; c p_{j} = p_{j}/betaj c 3) r_{j} = OP*v_{j} where OP is defined as in ssaupd c For shift-invert mode p_{j} = B*v_{j} is already available. @@ -184,7 +184,7 @@ c 5) Re-orthogonalization step: c s = V_{j}'*B*r_{j} c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || -c alphaj = alphaj + s_{j}; +c alphaj = alphaj + s_{j}; c 6) Iterative refinement step: c If (rnorm1 > 0.717*rnorm) then c rnorm = rnorm1 @@ -194,7 +194,7 @@ c If this is the first time in step 6), go to 5) c Else r_{j} lies in the span of V_{j} numerically. c Set r_{j} = 0 and rnorm = 0; go to 1) -c EndIf +c EndIf c End Do c c\EndLib @@ -202,15 +202,15 @@ c----------------------------------------------------------------------- c subroutine ssaitr - & (ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh, + & (ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh, & ipntr, workd, info) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -242,7 +242,7 @@ subroutine ssaitr c %---------------% c logical first, orth1, orth2, rstart, step3, step4 - integer i, ierr, ipj, irj, ivj, iter, itry, j, msglvl, + integer i, ierr, ipj, irj, ivj, iter, itry, j, msglvl, & infol, jj Real & rnorm1, wnorm, safmin, temp1 @@ -251,7 +251,7 @@ subroutine ssaitr & rnorm1, safmin, wnorm c c %-----------------------% -c | Local Array Arguments | +c | Local Array Arguments | c %-----------------------% c Real @@ -262,7 +262,7 @@ subroutine ssaitr c %----------------------% c external saxpy, scopy, sscal, sgemv, sgetv0, svout, smout, - & slascl, ivout, second + & slascl, ivout, arscnd c c %--------------------% c | External Functions | @@ -294,15 +294,15 @@ subroutine ssaitr end if c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msaitr -c +c c %------------------------------% c | Initial call to this routine | c %------------------------------% @@ -313,14 +313,14 @@ subroutine ssaitr rstart = .false. orth1 = .false. orth2 = .false. -c +c c %--------------------------------% c | Pointer to the current step of | c | the factorization to build | c %--------------------------------% c j = k + 1 -c +c c %------------------------------------------% c | Pointers used for reverse communication | c | when using WORKD. | @@ -330,7 +330,7 @@ subroutine ssaitr irj = ipj + n ivj = irj + n end if -c +c c %-------------------------------------------------% c | When in reverse communication mode one of: | c | STEP3, STEP4, ORTH1, ORTH2, RSTART | @@ -353,7 +353,7 @@ subroutine ssaitr c %------------------------------% c | Else this is the first step. | c %------------------------------% -c +c c %--------------------------------------------------------------% c | | c | A R N O L D I I T E R A T I O N L O O P | @@ -364,15 +364,15 @@ subroutine ssaitr 1000 continue c if (msglvl .gt. 2) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_saitr: generating Arnoldi vector no.') - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_saitr: B-norm of the current residual =') end if -c +c c %---------------------------------------------------------% -c | Check for exact zero. Equivalent to determing whether a | -c | j-step Arnoldi factorization is present. | +c | Check for exact zero. Equivalent to determining whether | +c | a j-step Arnoldi factorization is present. | c %---------------------------------------------------------% c if (rnorm .gt. zero) go to 40 @@ -384,10 +384,10 @@ subroutine ssaitr c %---------------------------------------------------% c if (msglvl .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_saitr: ****** restart at step ******') end if -c +c c %---------------------------------------------% c | ITRY is the loop variable that controls the | c | maximum amount of times that a restart is | @@ -406,7 +406,7 @@ subroutine ssaitr c | RSTART = .true. flow returns here. | c %--------------------------------------% c - call sgetv0 (ido, bmat, itry, .false., n, j, v, ldv, + call sgetv0 (ido, bmat, itry, .false., n, j, v, ldv, & resid, rnorm, ipntr, workd, ierr) if (ido .ne. 99) go to 9000 if (ierr .lt. 0) then @@ -420,12 +420,12 @@ subroutine ssaitr c %------------------------------------------------% c info = j - 1 - call second (t1) + call arscnd (t1) tsaitr = tsaitr + (t1 - t0) ido = 99 go to 9000 end if -c +c 40 continue c c %---------------------------------------------------------% @@ -447,12 +447,12 @@ subroutine ssaitr c | use LAPACK routine SLASCL | c %-----------------------------------------% c - call slascl ('General', i, i, rnorm, one, n, 1, + call slascl ('General', i, i, rnorm, one, n, 1, & v(1,j), n, infol) - call slascl ('General', i, i, rnorm, one, n, 1, + call slascl ('General', i, i, rnorm, one, n, 1, & workd(ipj), n, infol) end if -c +c c %------------------------------------------------------% c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | c | Note that this is not quite yet r_{j}. See STEP 4 | @@ -460,28 +460,28 @@ subroutine ssaitr c step3 = .true. nopx = nopx + 1 - call second (t2) + call arscnd (t2) call scopy (n, v(1,j), 1, workd(ivj), 1) ipntr(1) = ivj ipntr(2) = irj ipntr(3) = ipj ido = 1 -c +c c %-----------------------------------% c | Exit in order to compute OP*v_{j} | c %-----------------------------------% -c +c go to 9000 50 continue -c +c c %-----------------------------------% c | Back from reverse communication; | c | WORKD(IRJ:IRJ+N-1) := OP*v_{j}. | c %-----------------------------------% c - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) -c +c step3 = .false. c c %------------------------------------------% @@ -489,7 +489,7 @@ subroutine ssaitr c %------------------------------------------% c call scopy (n, workd(irj), 1, resid, 1) -c +c c %-------------------------------------------% c | STEP 4: Finish extending the symmetric | c | Arnoldi to length j. If MODE = 2 | @@ -500,33 +500,33 @@ subroutine ssaitr c %-------------------------------------------% c if (mode .eq. 2) go to 65 - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 step4 = .true. ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-------------------------------------% c | Exit in order to compute B*OP*v_{j} | c %-------------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy(n, resid, 1 , workd(ipj), 1) end if 60 continue -c +c c %-----------------------------------% c | Back from reverse communication; | c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j}. | c %-----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) - end if + end if c step4 = .false. c @@ -545,7 +545,7 @@ subroutine ssaitr c wnorm = sdot (n, resid, 1, workd(ivj), 1) wnorm = sqrt(abs(wnorm)) - else if (bmat .eq. 'G') then + else if (bmat .eq. 'G') then wnorm = sdot (n, resid, 1, workd(ipj), 1) wnorm = sqrt(abs(wnorm)) else if (bmat .eq. 'I') then @@ -567,19 +567,19 @@ subroutine ssaitr c %------------------------------------------% c if (mode .ne. 2 ) then - call sgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero, + call sgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero, & workd(irj), 1) else if (mode .eq. 2) then - call sgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero, + call sgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero, & workd(irj), 1) end if c c %--------------------------------------% c | Orthgonalize r_{j} against V_{j}. | -c | RESID contains OP*v_{j}. See STEP 3. | +c | RESID contains OP*v_{j}. See STEP 3. | c %--------------------------------------% c - call sgemv('N', n, j, -one, v, ldv, workd(irj), 1, one, + call sgemv('N', n, j, -one, v, ldv, workd(irj), 1, one, & resid, 1) c c %--------------------------------------% @@ -592,46 +592,46 @@ subroutine ssaitr else h(j,1) = rnorm end if - call second (t4) -c + call arscnd (t4) +c orth1 = .true. iter = 0 -c - call second (t2) +c + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*r_{j} | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd(ipj), 1) end if 70 continue -c +c c %---------------------------------------------------% c | Back from reverse communication if ORTH1 = .true. | c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c orth1 = .false. c c %------------------------------% c | Compute the B-norm of r_{j}. | c %------------------------------% c - if (bmat .eq. 'G') then + if (bmat .eq. 'G') then rnorm = sdot (n, resid, 1, workd(ipj), 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then @@ -655,7 +655,7 @@ subroutine ssaitr c if (rnorm .gt. 0.717*wnorm) go to 100 nrorth = nrorth + 1 -c +c c %---------------------------------------------------% c | Enter the Iterative refinement phase. If further | c | refinement is necessary, loop back here. The loop | @@ -668,7 +668,7 @@ subroutine ssaitr if (msglvl .gt. 2) then xtemp(1) = wnorm xtemp(2) = rnorm - call svout (logfil, 2, xtemp, ndigit, + call svout (logfil, 2, xtemp, ndigit, & '_saitr: re-orthonalization ; wnorm and rnorm are') end if c @@ -677,7 +677,7 @@ subroutine ssaitr c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | c %----------------------------------------------------% c - call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1, + call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1, & zero, workd(irj), 1) c c %----------------------------------------------% @@ -688,26 +688,26 @@ subroutine ssaitr c | H(j,j) is updated. | c %----------------------------------------------% c - call sgemv ('N', n, j, -one, v, ldv, workd(irj), 1, + call sgemv ('N', n, j, -one, v, ldv, workd(irj), 1, & one, resid, 1) c if (j .eq. 1 .or. rstart) h(j,1) = zero h(j,2) = h(j,2) + workd(irj + j - 1) -c +c orth2 = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-----------------------------------% c | Exit in order to compute B*r_{j}. | c | r_{j} is the corrected residual. | c %-----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd(ipj), 1) @@ -719,15 +719,15 @@ subroutine ssaitr c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if c c %-----------------------------------------------------% c | Compute the B-norm of the corrected residual r_{j}. | c %-----------------------------------------------------% -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm1 = sdot (n, resid, 1, workd(ipj), 1) rnorm1 = sqrt(abs(rnorm1)) else if (bmat .eq. 'I') then @@ -735,7 +735,7 @@ subroutine ssaitr end if c if (msglvl .gt. 0 .and. iter .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_saitr: Iterative refinement for Arnoldi residual') if (msglvl .gt. 2) then xtemp(1) = rnorm @@ -744,7 +744,7 @@ subroutine ssaitr & '_saitr: iterative refinement ; rnorm and rnorm1 are') end if end if -c +c c %-----------------------------------------% c | Determine if we need to perform another | c | step of re-orthogonalization. | @@ -757,7 +757,7 @@ subroutine ssaitr c %--------------------------------% c rnorm = rnorm1 -c +c else c c %-------------------------------------------% @@ -779,7 +779,7 @@ subroutine ssaitr 95 continue rnorm = zero end if -c +c c %----------------------------------------------% c | Branch here directly if iterative refinement | c | wasn't necessary or after at most NITER_REF | @@ -787,13 +787,13 @@ subroutine ssaitr c %----------------------------------------------% c 100 continue -c +c rstart = .false. orth2 = .false. -c - call second (t5) +c + call arscnd (t5) titref = titref + (t5 - t4) -c +c c %----------------------------------------------------------% c | Make sure the last off-diagonal element is non negative | c | If not perform a similarity transformation on H(1:j,1:j) | @@ -802,28 +802,28 @@ subroutine ssaitr c if (h(j,1) .lt. zero) then h(j,1) = -h(j,1) - if ( j .lt. k+np) then + if ( j .lt. k+np) then call sscal(n, -one, v(1,j+1), 1) else call sscal(n, -one, resid, 1) end if end if -c +c c %------------------------------------% c | STEP 6: Update j = j+1; Continue | c %------------------------------------% c j = j + 1 if (j .gt. k+np) then - call second (t1) + call arscnd (t1) tsaitr = tsaitr + (t1 - t0) ido = 99 c if (msglvl .gt. 1) then - call svout (logfil, k+np, h(1,2), ndigit, + call svout (logfil, k+np, h(1,2), ndigit, & '_saitr: main diagonal of matrix H of step K+NP.') if (k+np .gt. 1) then - call svout (logfil, k+np-1, h(2,1), ndigit, + call svout (logfil, k+np-1, h(2,1), ndigit, & '_saitr: sub diagonal of matrix H of step K+NP.') end if end if @@ -836,7 +836,7 @@ subroutine ssaitr c %--------------------------------------------------------% c go to 1000 -c +c c %---------------------------------------------------------------% c | | c | E N D O F M A I N I T E R A T I O N L O O P | diff --git a/Toolbox/arpack-src/ssapps.f b/Toolbox/arpack-src/ssapps.f index 5269a8591..77bd9d52c 100644 --- a/Toolbox/arpack-src/ssapps.f +++ b/Toolbox/arpack-src/ssapps.f @@ -12,8 +12,8 @@ c c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q c -c where Q is an orthogonal matrix of order KEV+NP. Q is the product of -c rotations resulting from the NP bulge chasing sweeps. The updated Arnoldi +c where Q is an orthogonal matrix of order KEV+NP. Q is the product of +c rotations resulting from the NP bulge chasing sweeps. The updated Arnoldi c factorization becomes: c c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T. @@ -49,7 +49,7 @@ c INPUT: H contains the symmetric tridiagonal matrix of the c Arnoldi factorization with the subdiagonal in the 1st column c starting at H(2,1) and the main diagonal in the 2nd column. -c OUTPUT: H contains the updated tridiagonal matrix in the +c OUTPUT: H contains the updated tridiagonal matrix in the c KEV leading submatrix. c c LDH Integer. (INPUT) @@ -85,13 +85,13 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c ivout ARPACK utility routine that prints integers. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine that prints vectors. c slamch LAPACK routine that determines machine constants. c slartg LAPACK Givens rotation construction routine. @@ -107,19 +107,19 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c 12/16/93: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sapps.F SID: 2.6 DATE OF SID: 3/28/97 RELEASE: 2 c c\Remarks c 1. In this version, each shift is applied to all the subblocks of -c the tridiagonal matrix H and not just to the submatrix that it -c comes from. This routine assumes that the subdiagonal elements +c the tridiagonal matrix H and not just to the submatrix that it +c comes from. This routine assumes that the subdiagonal elements c of H that are stored in h(1:kev+np,1) are nonegative upon input c and enforce this condition upon output. This version incorporates c deflation. See code for documentation. @@ -135,8 +135,8 @@ subroutine ssapps c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -149,7 +149,7 @@ subroutine ssapps c %-----------------% c Real - & h(ldh,2), q(ldq,kev+np), resid(n), shift(np), + & h(ldh,2), q(ldq,kev+np), resid(n), shift(np), & v(ldv,kev+np), workd(2*n) c c %------------% @@ -175,8 +175,8 @@ subroutine ssapps c | External Subroutines | c %----------------------% c - external saxpy, scopy, sscal, slacpy, slartg, slaset, svout, - & ivout, second, sgemv + external saxpy, scopy, sscal, slacpy, slartg, slaset, svout, + & ivout, arscnd, sgemv c c %--------------------% c | External Functions | @@ -193,7 +193,7 @@ subroutine ssapps intrinsic abs c c %----------------% -c | Data statments | +c | Data statements | c %----------------% c data first / .true. / @@ -213,11 +213,11 @@ subroutine ssapps c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msapps -c - kplusp = kev + np -c +c + kplusp = kev + np +c c %----------------------------------------------% c | Initialize Q to the identity matrix of order | c | kplusp used to accumulate the rotations. | @@ -230,7 +230,7 @@ subroutine ssapps c %----------------------------------------------% c if (np .eq. 0) go to 9000 -c +c c %----------------------------------------------------------% c | Apply the np shifts implicitly. Apply each shift to the | c | whole matrix and not just to the submatrix from which it | @@ -238,7 +238,7 @@ subroutine ssapps c %----------------------------------------------------------% c do 90 jj = 1, np -c +c istart = itop c c %----------------------------------------------------------% @@ -261,11 +261,11 @@ subroutine ssapps big = abs(h(i,2)) + abs(h(i+1,2)) if (h(i+1,1) .le. epsmch*big) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_sapps: deflation at row/column no.') - call ivout (logfil, 1, jj, ndigit, - & '_sapps: occured before shift number.') - call svout (logfil, 1, h(i+1,1), ndigit, + call ivout (logfil, 1, [jj], ndigit, + & '_sapps: occurred before shift number.') + call svout (logfil, 1, h(i+1,1), ndigit, & '_sapps: the corresponding off diagonal element') end if h(i+1,1) = zero @@ -277,7 +277,7 @@ subroutine ssapps 40 continue c if (istart .lt. iend) then -c +c c %--------------------------------------------------------% c | Construct the plane rotation G'(istart,istart+1,theta) | c | that attempts to drive h(istart+1,1) to zero. | @@ -286,7 +286,7 @@ subroutine ssapps f = h(istart,2) - shift(jj) g = h(istart+1,1) call slartg (f, g, c, s, r) -c +c c %-------------------------------------------------------% c | Apply rotation to the left and right of H; | c | H <- G' * H * G, where G = G(istart,istart+1,theta). | @@ -296,11 +296,11 @@ subroutine ssapps a1 = c*h(istart,2) + s*h(istart+1,1) a2 = c*h(istart+1,1) + s*h(istart+1,2) a4 = c*h(istart+1,2) - s*h(istart+1,1) - a3 = c*h(istart+1,1) - s*h(istart,2) + a3 = c*h(istart+1,1) - s*h(istart,2) h(istart,2) = c*a1 + s*a2 h(istart+1,2) = c*a4 - s*a3 h(istart+1,1) = c*a3 + s*a4 -c +c c %----------------------------------------------------% c | Accumulate the rotation in the matrix Q; Q <- Q*G | c %----------------------------------------------------% @@ -323,7 +323,7 @@ subroutine ssapps c %----------------------------------------------% c do 70 i = istart+1, iend-1 -c +c c %----------------------------------------------% c | Construct the plane rotation G'(i,i+1,theta) | c | that zeros the i-th bulge that was created | @@ -351,23 +351,23 @@ subroutine ssapps c = -c s = -s end if -c +c c %--------------------------------------------% c | Apply rotation to the left and right of H; | c | H <- G * H * G', where G = G(i,i+1,theta) | c %--------------------------------------------% c h(i,1) = r -c +c a1 = c*h(i,2) + s*h(i+1,1) a2 = c*h(i+1,1) + s*h(i+1,2) a3 = c*h(i+1,1) - s*h(i,2) a4 = c*h(i+1,2) - s*h(i+1,1) -c +c h(i,2) = c*a1 + s*a2 h(i+1,2) = c*a4 - s*a3 h(i+1,1) = c*a3 + s*a4 -c +c c %----------------------------------------------------% c | Accumulate the rotation in the matrix Q; Q <- Q*G | c %----------------------------------------------------% @@ -425,16 +425,16 @@ subroutine ssapps c %------------------------------------------% c | All shifts have been applied. Check for | c | more possible deflation that might occur | -c | after the last shift is applied. | +c | after the last shift is applied. | c %------------------------------------------% c do 100 i = itop, kplusp-1 big = abs(h(i,2)) + abs(h(i+1,2)) if (h(i+1,1) .le. epsmch*big) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_sapps: deflation at row/column no.') - call svout (logfil, 1, h(i+1,1), ndigit, + call svout (logfil, 1, h(i+1,1), ndigit, & '_sapps: the corresponding off diagonal element') end if h(i+1,1) = zero @@ -447,13 +447,13 @@ subroutine ssapps c | This is not necessary if h(kev+1,1) = 0. | c %-------------------------------------------------% c - if ( h(kev+1,1) .gt. zero ) + if ( h(kev+1,1) .gt. zero ) & call sgemv ('N', n, kplusp, one, v, ldv, & q(1,kev+1), 1, zero, workd(n+1), 1) -c +c c %-------------------------------------------------------% c | Compute column 1 to kev of (V*Q) in backward order | -c | taking advantage that Q is an upper triangular matrix | +c | taking advantage that Q is an upper triangular matrix | c | with lower bandwidth np. | c | Place results in v(:,kplusp-kev:kplusp) temporarily. | c %-------------------------------------------------------% @@ -469,15 +469,15 @@ subroutine ssapps c %-------------------------------------------------% c call slacpy ('All', n, kev, v(1,np+1), ldv, v, ldv) -c +c c %--------------------------------------------% c | Copy the (kev+1)-st column of (V*Q) in the | c | appropriate place if h(kev+1,1) .ne. zero. | c %--------------------------------------------% c - if ( h(kev+1,1) .gt. zero ) + if ( h(kev+1,1) .gt. zero ) & call scopy (n, workd(n+1), 1, v(1,kev+1), 1) -c +c c %-------------------------------------% c | Update the residual vector: | c | r <- sigmak*r + betak*v(:,kev+1) | @@ -487,26 +487,26 @@ subroutine ssapps c %-------------------------------------% c call sscal (n, q(kplusp,kev), resid, 1) - if (h(kev+1,1) .gt. zero) + if (h(kev+1,1) .gt. zero) & call saxpy (n, h(kev+1,1), v(1,kev+1), 1, resid, 1) c if (msglvl .gt. 1) then - call svout (logfil, 1, q(kplusp,kev), ndigit, + call svout (logfil, 1, q(kplusp,kev), ndigit, & '_sapps: sigmak of the updated residual vector') - call svout (logfil, 1, h(kev+1,1), ndigit, + call svout (logfil, 1, h(kev+1,1), ndigit, & '_sapps: betak of the updated residual vector') - call svout (logfil, kev, h(1,2), ndigit, + call svout (logfil, kev, h(1,2), ndigit, & '_sapps: updated main diagonal of H for next iteration') if (kev .gt. 1) then - call svout (logfil, kev-1, h(2,1), ndigit, + call svout (logfil, kev-1, h(2,1), ndigit, & '_sapps: updated sub diagonal of H for next iteration') end if end if c - call second (t1) + call arscnd (t1) tsapps = tsapps + (t1 - t0) -c - 9000 continue +c + 9000 continue return c c %---------------% diff --git a/Toolbox/arpack-src/ssaup2.f b/Toolbox/arpack-src/ssaup2.f index bf346297c..504f28fb0 100644 --- a/Toolbox/arpack-src/ssaup2.f +++ b/Toolbox/arpack-src/ssaup2.f @@ -3,35 +3,35 @@ c c\Name: ssaup2 c -c\Description: +c\Description: c Intermediate level interface called by ssaupd. c c\Usage: -c call ssaup2 +c call ssaup2 c ( IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD, -c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL, +c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL, c IPNTR, WORKD, INFO ) c c\Arguments c c IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in ssaupd. c MODE, ISHIFT, MXITER: see the definition of IPARAM in ssaupd. -c +c c NP Integer. (INPUT/OUTPUT) -c Contains the number of implicit shifts to apply during -c each Arnoldi/Lanczos iteration. -c If ISHIFT=1, NP is adjusted dynamically at each iteration +c Contains the number of implicit shifts to apply during +c each Arnoldi/Lanczos iteration. +c If ISHIFT=1, NP is adjusted dynamically at each iteration c to accelerate convergence and prevent stagnation. -c This is also roughly equal to the number of matrix-vector +c This is also roughly equal to the number of matrix-vector c products (involving the operator OP) per Arnoldi iteration. c The logic for adjusting is contained within the current c subroutine. c If ISHIFT=0, NP is the number of shifts the user needs -c to provide via reverse comunication. 0 < NP < NCV-NEV. +c to provide via reverse communication. 0 < NP < NCV-NEV. c NP may be less than NCV-NEV since a leading block of the current c upper Tridiagonal matrix has split off and contains "unwanted" c Ritz values. -c Upon termination of the IRA iteration, NP contains the number +c Upon termination of the IRA iteration, NP contains the number c of "converged" wanted Ritz values. c c IUPD Integer. (INPUT) @@ -42,18 +42,18 @@ c The Lanczos basis vectors. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Real (NEV+NP) by 2 array. (OUTPUT) c H is used to store the generated symmetric tridiagonal matrix -c The subdiagonal is stored in the first column of H starting +c The subdiagonal is stored in the first column of H starting c at H(2,1). The main diagonal is stored in the second column -c of H starting at H(1,2). If ssaup2 converges store the +c of H starting at H(1,2). If ssaup2 converges store the c B-norm of the final residual vector in H(1,1). c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c RITZ Real array of length NEV+NP. (OUTPUT) @@ -63,33 +63,33 @@ c BOUNDS(1:NEV) contain the error bounds corresponding to RITZ. c c Q Real (NEV+NP) by (NEV+NP) array. (WORKSPACE) -c Private (replicated) work array used to accumulate the +c Private (replicated) work array used to accumulate the c rotation in the shift application step. c c LDQ Integer. (INPUT) c Leading dimension of Q exactly as declared in the calling c program. -c +c c WORKL Real array of length at least 3*(NEV+NP). (INPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on -c the front end. It is used in the computation of the +c the front end. It is used in the computation of the c tridiagonal eigenvalue problem, the calculation and c application of the shifts and convergence checking. c If ISHIFT .EQ. O and IDO .EQ. 3, the first NP locations -c of WORKL are used in reverse communication to hold the user +c of WORKL are used in reverse communication to hold the user c supplied shifts. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORKD for +c Pointer to mark the starting locations in the WORKD for c vectors used by the Lanczos iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in one of +c IPNTR(3): pointer to the vector B * X when used in one of c the spectral transformation modes. X is the current c operand. c ------------------------------------------------------------- -c +c c WORKD Real work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Lanczos iteration c for reverse communication. The user should not use WORKD @@ -102,9 +102,9 @@ c possibly from a previous run. c Error flag on output. c = 0: Normal return. -c = 1: All possible eigenvalues of OP has been found. +c = 1: All possible eigenvalues of OP has been found. c NP returns the size of the invariant subspace -c spanning the operator OP. +c spanning the operator OP. c = 2: No shifts could be applied. c = -8: Error return from trid. eigenvalue calculation; c This should never happen. @@ -122,7 +122,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall, @@ -132,15 +132,15 @@ c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to c Implement the Spectral Transformation", Math. Comp., 48 (1987), c pp 663-673. -c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos -c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", +c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos +c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", c SIAM J. Matr. Anal. Apps., January (1993). c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines c for Updating the QR decomposition", ACM TOMS, December 1990, c Volume 16 Number 4, pp 369-377. c c\Routines called: -c sgetv0 ARPACK initial vector generation routine. +c sgetv0 ARPACK initial vector generation routine. c ssaitr ARPACK Lanczos factorization routine. c ssapps ARPACK application of implicit shifts routine. c ssconv ARPACK convergence of Ritz values routine. @@ -148,11 +148,11 @@ c ssgets ARPACK reorder Ritz values and error bounds routine. c ssortr ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine that prints vectors. c slamch LAPACK routine that determines machine constants. c scopy Level 1 BLAS that copies one vector to another. -c sdot Level 1 BLAS that computes the scalar product of two vectors. +c sdot Level 1 BLAS that computes the scalar product of two vectors. c snrm2 Level 1 BLAS that computes the norm of a vector. c sscal Level 1 BLAS that scales a vector. c sswap Level 1 BLAS that swaps two vectors. @@ -162,14 +162,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c 12/15/93: Version ' 2.4' c xx/xx/95: Version ' 2.4'. (R.B. Lehoucq) c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: saup2.F SID: 2.7 DATE OF SID: 5/19/98 RELEASE: 2 c c\EndLib @@ -177,16 +177,16 @@ c----------------------------------------------------------------------- c subroutine ssaup2 - & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, + & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, + & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, & q, ldq, workl, ipntr, workd, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -204,8 +204,8 @@ subroutine ssaup2 c integer ipntr(3) Real - & bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n), - & ritz(nev+np), v(ldv,nev+np), workd(3*n), + & bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n), + & ritz(nev+np), v(ldv,nev+np), workd(3*n), & workl(3*(nev+np)) c c %------------% @@ -222,8 +222,8 @@ subroutine ssaup2 c character wprime*2 logical cnorm, getv0, initv, update, ushift - integer ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0, - & np0, nptemp, nevd2, nevm2, kp(3) + integer ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0, + & np0, nptemp, nevd2, nevm2, kp(3) Real & rnorm, temp, eps23 save cnorm, getv0, initv, update, ushift, @@ -234,8 +234,8 @@ subroutine ssaup2 c | External Subroutines | c %----------------------% c - external scopy, sgetv0, ssaitr, sscal, ssconv, sseigt, ssgets, - & ssapps, ssortr, svout, ivout, second, sswap + external scopy, sgetv0, ssaitr, sscal, ssconv, sseigt, ssgets, + & ssapps, ssortr, svout, ivout, arscnd, sswap c c %--------------------% c | External Functions | @@ -256,13 +256,13 @@ subroutine ssaup2 c %-----------------------% c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msaup2 c c %---------------------------------% @@ -292,7 +292,7 @@ subroutine ssaup2 kplusp = nev0 + np0 nconv = 0 iter = 0 -c +c c %--------------------------------------------% c | Set flags for computing the first NEV steps | c | of the Lanczos factorization. | @@ -315,7 +315,7 @@ subroutine ssaup2 initv = .false. end if end if -c +c c %---------------------------------------------% c | Get a possibly random starting vector and | c | force it into the range of the operator OP. | @@ -332,7 +332,7 @@ subroutine ssaup2 if (rnorm .eq. zero) then c c %-----------------------------------------% -c | The initial vector is zero. Error exit. | +c | The initial vector is zero. Error exit. | c %-----------------------------------------% c info = -9 @@ -341,7 +341,7 @@ subroutine ssaup2 getv0 = .false. ido = 0 end if -c +c c %------------------------------------------------------------% c | Back from reverse communication: continue with update step | c %------------------------------------------------------------% @@ -360,14 +360,14 @@ subroutine ssaup2 c %-------------------------------------% c if (cnorm) go to 100 -c +c c %----------------------------------------------------------% c | Compute the first NEV steps of the Lanczos factorization | c %----------------------------------------------------------% c - call ssaitr (ido, bmat, n, 0, nev0, mode, resid, rnorm, v, ldv, + call ssaitr (ido, bmat, n, 0, nev0, mode, resid, rnorm, v, ldv, & h, ldh, ipntr, workd, info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -388,7 +388,7 @@ subroutine ssaup2 info = -9999 go to 1200 end if -c +c c %--------------------------------------------------------------% c | | c | M A I N LANCZOS I T E R A T I O N L O O P | @@ -396,22 +396,22 @@ subroutine ssaup2 c | factorization in place. | c | | c %--------------------------------------------------------------% -c +c 1000 continue c iter = iter + 1 c if (msglvl .gt. 0) then - call ivout (logfil, 1, iter, ndigit, + call ivout (logfil, 1, [iter], ndigit, & '_saup2: **** Start of major iteration number ****') end if if (msglvl .gt. 1) then - call ivout (logfil, 1, nev, ndigit, + call ivout (logfil, 1, [nev], ndigit, & '_saup2: The length of the current Lanczos factorization') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_saup2: Extend the Lanczos factorization by') end if -c +c c %------------------------------------------------------------% c | Compute NP additional steps of the Lanczos factorization. | c %------------------------------------------------------------% @@ -420,9 +420,9 @@ subroutine ssaup2 20 continue update = .true. c - call ssaitr (ido, bmat, n, nev, np, mode, resid, rnorm, v, + call ssaitr (ido, bmat, n, nev, np, mode, resid, rnorm, v, & ldv, h, ldh, ipntr, workd, info) -c +c c %---------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP and possibly B | @@ -434,7 +434,7 @@ subroutine ssaup2 c c %-----------------------------------------------------% c | ssaitr was unable to build an Lanczos factorization | -c | of length NEV0+NP0. INFO is returned with the size | +c | of length NEV0+NP0. INFO is returned with the size | c | of the factorization built. Exit main loop. | c %-----------------------------------------------------% c @@ -446,10 +446,10 @@ subroutine ssaup2 update = .false. c if (msglvl .gt. 1) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_saup2: Current B-norm of residual for factorization') end if -c +c c %--------------------------------------------------------% c | Compute the eigenvalues and corresponding error bounds | c | of the current symmetric tridiagonal matrix. | @@ -483,7 +483,7 @@ subroutine ssaup2 nev = nev0 np = np0 call ssgets (ishift, which, nev, np, ritz, bounds, workl) -c +c c %-------------------% c | Convergence test. | c %-------------------% @@ -520,11 +520,11 @@ subroutine ssaup2 nev = nev + 1 end if 30 continue -c - if ( (nconv .ge. nev0) .or. +c + if ( (nconv .ge. nev0) .or. & (iter .gt. mxiter) .or. & (np .eq. 0) ) then -c +c c %------------------------------------------------% c | Prepare to exit. Put the converged Ritz values | c | and corresponding bounds in RITZ(1:NCONV) and | @@ -547,7 +547,7 @@ subroutine ssaup2 wprime = 'SA' call ssortr (wprime, .true., kplusp, ritz, bounds) nevd2 = nev0 / 2 - nevm2 = nev0 - nevd2 + nevm2 = nev0 - nevd2 if ( nev .gt. 1 ) then call sswap ( min(nevd2,np), ritz(nevm2+1), 1, & ritz( max(kplusp-nevd2+1,kplusp-np+1) ), 1) @@ -587,7 +587,7 @@ subroutine ssaup2 c c %----------------------------------------------------% c | Sort the Ritz values according to the scaled Ritz | -c | esitmates. This will push all the converged ones | +c | estimates. This will push all the converged ones | c | towards the front of ritzr, ritzi, bounds | c | (in the case when NCONV < NEV.) | c %----------------------------------------------------% @@ -651,13 +651,13 @@ subroutine ssaup2 end if c c %------------------------------------% -c | Max iterations have been exceeded. | +c | Max iterations have been exceeded. | c %------------------------------------% c if (iter .gt. mxiter .and. nconv .lt. nev) info = 1 c c %---------------------% -c | No shifts to apply. | +c | No shifts to apply. | c %---------------------% c if (np .eq. 0 .and. nconv .lt. nev0) info = 2 @@ -681,20 +681,20 @@ subroutine ssaup2 nev = 2 end if np = kplusp - nev -c +c c %---------------------------------------% c | If the size of NEV was just increased | c | resort the eigenvalues. | c %---------------------------------------% -c - if (nevbef .lt. nev) +c + if (nevbef .lt. nev) & call ssgets (ishift, which, nev, np, ritz, bounds, & workl) c end if c if (msglvl .gt. 0) then - call ivout (logfil, 1, nconv, ndigit, + call ivout (logfil, 1, [nconv], ndigit, & '_saup2: no. of "converged" Ritz values at this iter.') if (msglvl .gt. 1) then kp(1) = nev @@ -708,7 +708,7 @@ subroutine ssaup2 end if end if -c +c if (ishift .eq. 0) then c c %-----------------------------------------------------% @@ -731,8 +731,8 @@ subroutine ssaup2 c %------------------------------------% c ushift = .false. -c -c +c +c c %---------------------------------------------------------% c | Move the NP shifts to the first NP locations of RITZ to | c | free up WORKL. This is for the non-exact shift case; | @@ -742,7 +742,7 @@ subroutine ssaup2 if (ishift .eq. 0) call scopy (np, workl, 1, ritz, 1) c if (msglvl .gt. 2) then - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_saup2: The number of shifts to apply ') call svout (logfil, np, workl, ndigit, & '_saup2: shifts selected') @@ -751,7 +751,7 @@ subroutine ssaup2 & '_saup2: corresponding Ritz estimates') end if end if -c +c c %---------------------------------------------------------% c | Apply the NP0 implicit shifts by QR bulge chasing. | c | Each shift is applied to the entire tridiagonal matrix. | @@ -770,36 +770,36 @@ subroutine ssaup2 c %---------------------------------------------% c cnorm = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call scopy (n, resid, 1, workd(n+1), 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*RESID | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call scopy (n, resid, 1, workd, 1) end if -c +c 100 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(1:N) := B*RESID | c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c - if (bmat .eq. 'G') then +c + if (bmat .eq. 'G') then rnorm = sdot (n, resid, 1, workd, 1) rnorm = sqrt(abs(rnorm)) else if (bmat .eq. 'I') then @@ -809,14 +809,14 @@ subroutine ssaup2 130 continue c if (msglvl .gt. 2) then - call svout (logfil, 1, rnorm, ndigit, + call svout (logfil, 1, [rnorm], ndigit, & '_saup2: B-norm of residual for NEV factorization') call svout (logfil, nev, h(1,2), ndigit, & '_saup2: main diagonal of compressed H matrix') call svout (logfil, nev-1, h(2,1), ndigit, & '_saup2: subdiagonal of compressed H matrix') end if -c +c go to 1000 c c %---------------------------------------------------------------% @@ -824,12 +824,12 @@ subroutine ssaup2 c | E N D O F M A I N I T E R A T I O N L O O P | c | | c %---------------------------------------------------------------% -c +c 1100 continue c mxiter = iter nev = nconv -c +c 1200 continue ido = 99 c @@ -837,9 +837,9 @@ subroutine ssaup2 c | Error exit | c %------------% c - call second (t1) + call arscnd (t1) tsaup2 = t1 - t0 -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/ssaupd.f b/Toolbox/arpack-src/ssaupd.f index 1989ed254..975681527 100644 --- a/Toolbox/arpack-src/ssaupd.f +++ b/Toolbox/arpack-src/ssaupd.f @@ -3,31 +3,31 @@ c c\Name: ssaupd c -c\Description: +c\Description: c -c Reverse communication interface for the Implicitly Restarted Arnoldi -c Iteration. For symmetric problems this reduces to a variant of the Lanczos -c method. This method has been designed to compute approximations to a -c few eigenpairs of a linear operator OP that is real and symmetric -c with respect to a real positive semi-definite symmetric matrix B, +c Reverse communication interface for the Implicitly Restarted Arnoldi +c Iteration. For symmetric problems this reduces to a variant of the Lanczos +c method. This method has been designed to compute approximations to a +c few eigenpairs of a linear operator OP that is real and symmetric +c with respect to a real positive semi-definite symmetric matrix B, c i.e. -c -c B*OP = (OP`)*B. c -c Another way to express this condition is +c B*OP = (OP`)*B. +c +c Another way to express this condition is c c < x,OPy > = < OPx,y > where < z,w > = z`Bw . -c -c In the standard eigenproblem B is the identity matrix. +c +c In the standard eigenproblem B is the identity matrix. c ( A` denotes transpose of A) c c The computed approximate eigenvalues are called Ritz values and c the corresponding approximate eigenvectors are called Ritz vectors. c -c ssaupd is usually called iteratively to solve one of the +c ssaupd is usually called iteratively to solve one of the c following problems: c -c Mode 1: A*x = lambda*x, A symmetric +c Mode 1: A*x = lambda*x, A symmetric c ===> OP = A and B = I. c c Mode 2: A*x = lambda*M*x, A symmetric, M symmetric positive definite @@ -35,10 +35,10 @@ c ===> (If M can be factored see remark 3 below) c c Mode 3: K*x = lambda*M*x, K symmetric, M symmetric positive semi-definite -c ===> OP = (inv[K - sigma*M])*M and B = M. +c ===> OP = (inv[K - sigma*M])*M and B = M. c ===> Shift-and-Invert mode c -c Mode 4: K*x = lambda*KG*x, K symmetric positive semi-definite, +c Mode 4: K*x = lambda*KG*x, K symmetric positive semi-definite, c KG symmetric indefinite c ===> OP = (inv[K - sigma*KG])*K and B = K. c ===> Buckling mode @@ -60,13 +60,13 @@ c approximations. c c\Usage: -c call ssaupd +c call ssaupd c ( IDO, BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, c IPNTR, WORKD, WORKL, LWORKL, INFO ) c c\Arguments c IDO Integer. (INPUT/OUTPUT) -c Reverse communication flag. IDO must be zero on the first +c Reverse communication flag. IDO must be zero on the first c call to ssaupd. IDO will be set internally to c indicate the type of operation to be performed. Control is c then given back to the calling routine which has the @@ -95,7 +95,7 @@ c placing the shifts. See remark 6 below. c IDO = 99: done c ------------------------------------------------------------- -c +c c BMAT Character*1. (INPUT) c BMAT specifies the type of the matrix B that defines the c semi-inner product for the operator OP. @@ -111,7 +111,7 @@ c 'LA' - compute the NEV largest (algebraic) eigenvalues. c 'SA' - compute the NEV smallest (algebraic) eigenvalues. c 'LM' - compute the NEV largest (in magnitude) eigenvalues. -c 'SM' - compute the NEV smallest (in magnitude) eigenvalues. +c 'SM' - compute the NEV smallest (in magnitude) eigenvalues. c 'BE' - compute NEV eigenvalues, half from each end of the c spectrum. When NEV is odd, compute one more from the c high end than from the low end. @@ -121,27 +121,27 @@ c Number of eigenvalues of OP to be computed. 0 < NEV < N. c c TOL Real scalar. (INPUT) -c Stopping criterion: the relative accuracy of the Ritz value +c Stopping criterion: the relative accuracy of the Ritz value c is considered acceptable if BOUNDS(I) .LE. TOL*ABS(RITZ(I)). c If TOL .LE. 0. is passed a default is set: c DEFAULT = SLAMCH('EPS') (machine precision as computed c by the LAPACK auxiliary subroutine SLAMCH). c c RESID Real array of length N. (INPUT/OUTPUT) -c On INPUT: +c On INPUT: c If INFO .EQ. 0, a random initial residual vector is used. c If INFO .NE. 0, RESID contains the initial residual vector, c possibly from a previous run. c On OUTPUT: -c RESID contains the final residual vector. +c RESID contains the final residual vector. c c NCV Integer. (INPUT) c Number of columns of the matrix V (less than or equal to N). -c This will indicate how many Lanczos vectors are generated -c at each iteration. After the startup phase in which NEV -c Lanczos vectors are generated, the algorithm generates +c This will indicate how many Lanczos vectors are generated +c at each iteration. After the startup phase in which NEV +c Lanczos vectors are generated, the algorithm generates c NCV-NEV Lanczos vectors at each subsequent update iteration. -c Most of the cost in generating each Lanczos vector is in the +c Most of the cost in generating each Lanczos vector is in the c matrix-vector product OP*x. (See remark 4 below). c c V Real N by NCV array. (OUTPUT) @@ -161,10 +161,10 @@ c the current tridiagonal matrix T are returned in c the part of WORKL array corresponding to RITZ. c See remark 6 below. -c ISHIFT = 1: exact shifts with respect to the reduced -c tridiagonal matrix T. This is equivalent to -c restarting the iteration with a starting vector -c that is a linear combination of Ritz vectors +c ISHIFT = 1: exact shifts with respect to the reduced +c tridiagonal matrix T. This is equivalent to +c restarting the iteration with a starting vector +c that is a linear combination of Ritz vectors c associated with the "wanted" Ritz values. c ------------------------------------------------------------- c @@ -172,8 +172,8 @@ c No longer referenced. See remark 2 below. c c IPARAM(3) = MXITER -c On INPUT: maximum number of Arnoldi update iterations allowed. -c On OUTPUT: actual number of Arnoldi update iterations taken. +c On INPUT: maximum number of Arnoldi update iterations allowed. +c On OUTPUT: actual number of Arnoldi update iterations taken. c c IPARAM(4) = NB: blocksize to be used in the recurrence. c The code currently works only for NB = 1. @@ -183,11 +183,11 @@ c the convergence criterion. c c IPARAM(6) = IUPD -c No longer referenced. Implicit restarting is ALWAYS used. +c No longer referenced. Implicit restarting is ALWAYS used. c c IPARAM(7) = MODE c On INPUT determines what type of eigenproblem is being solved. -c Must be 1,2,3,4,5; See under \Description of ssaupd for the +c Must be 1,2,3,4,5; See under \Description of ssaupd for the c five modes available. c c IPARAM(8) = NP @@ -199,7 +199,7 @@ c IPARAM(9) = NUMOP, IPARAM(10) = NUMOPB, IPARAM(11) = NUMREO, c OUTPUT: NUMOP = total number of OP*x operations, c NUMOPB = total number of B*x operations if BMAT='G', -c NUMREO = total number of steps of re-orthogonalization. +c NUMREO = total number of steps of re-orthogonalization. c c IPNTR Integer array of length 11. (OUTPUT) c Pointer to mark the starting locations in the WORKD and WORKL @@ -207,7 +207,7 @@ c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X in WORKD. c IPNTR(2): pointer to the current result vector Y in WORKD. -c IPNTR(3): pointer to the vector B * X in WORKD when used in +c IPNTR(3): pointer to the vector B * X in WORKD when used in c the shift-and-invert mode. c IPNTR(4): pointer to the next available location in WORKL c that is untouched by the program. @@ -224,14 +224,14 @@ c of the tridiagonal matrix T. Only referenced by c sseupd if RVEC = .TRUE. See Remarks. c ------------------------------------------------------------- -c +c c WORKD Real work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The user should not use WORKD +c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration. Upon termination c WORKD(1:N) contains B*RESID(1:N). If the Ritz vectors are desired c subroutine sseupd uses this output. -c See Data Distribution Note below. +c See Data Distribution Note below. c c WORKL Real work array of length LWORKL. (OUTPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on @@ -247,13 +247,13 @@ c Error flag on output. c = 0: Normal exit. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. IPARAM(5) +c All possible eigenvalues of OP has been found. IPARAM(5) c returns the number of wanted converged Ritz values. c = 2: No longer an informational error. Deprecated starting c with release 2 of ARPACK. -c = 3: No shifts could be applied during a cycle of the -c Implicitly restarted Arnoldi iteration. One possibility -c is to increase the size of NCV relative to NEV. +c = 3: No shifts could be applied during a cycle of the +c Implicitly restarted Arnoldi iteration. One possibility +c is to increase the size of NCV relative to NEV. c See remark 4 below. c = -1: N must be positive. c = -2: NEV must be positive. @@ -267,9 +267,9 @@ c Informatinal error from LAPACK routine ssteqr. c = -9: Starting vector is zero. c = -10: IPARAM(7) must be 1,2,3,4,5. -c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatable. +c = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatible. c = -12: IPARAM(1) must be equal to 0 or 1. -c = -13: NEV and WHICH = 'BE' are incompatable. +c = -13: NEV and WHICH = 'BE' are incompatible. c = -9999: Could not build an Arnoldi factorization. c IPARAM(5) returns the size of the current Arnoldi c factorization. The user is advised to check that @@ -277,12 +277,12 @@ c c c\Remarks -c 1. The converged Ritz values are always returned in ascending +c 1. The converged Ritz values are always returned in ascending c algebraic order. The computed Ritz values are approximate c eigenvalues of OP. The selection of WHICH should be made -c with this in mind when Mode = 3,4,5. After convergence, -c approximate eigenvalues of the original problem may be obtained -c with the ARPACK subroutine sseupd. +c with this in mind when Mode = 3,4,5. After convergence, +c approximate eigenvalues of the original problem may be obtained +c with the ARPACK subroutine sseupd. c c 2. If the Ritz vectors corresponding to the converged Ritz values c are needed, the user must call sseupd immediately following completion @@ -290,38 +290,38 @@ c c 3. If M can be factored into a Cholesky factorization M = LL` c then Mode = 2 should not be selected. Instead one should use -c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular +c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular c linear systems should be solved with L and L` rather c than computing inverses. After convergence, an approximate c eigenvector z of the original problem is recovered by solving c L`z = x where x is a Ritz vector of OP. c c 4. At present there is no a-priori analysis to guide the selection -c of NCV relative to NEV. The only formal requrement is that NCV > NEV. +c of NCV relative to NEV. The only formal requirement is that NCV > NEV. c However, it is recommended that NCV .ge. 2*NEV. If many problems of c the same type are to be solved, one should experiment with increasing -c NCV while keeping NEV fixed for a given test problem. This will +c NCV while keeping NEV fixed for a given test problem. This will c usually decrease the required number of OP*x operations but it c also increases the work and storage required to maintain the orthogonal c basis vectors. The optimal "cross-over" with respect to CPU time c is problem dependent and must be determined empirically. c -c 5. If IPARAM(7) = 2 then in the Reverse commuication interface the user +c 5. If IPARAM(7) = 2 then in the Reverse communication interface the user c must do the following. When IDO = 1, Y = OP * X is to be computed. c When IPARAM(7) = 2 OP = inv(B)*A. After computing A*X the user c must overwrite X with A*X. Y is then the solution to the linear set c of equations B*Y = A*X. c -c 6. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the -c NP = IPARAM(8) shifts in locations: -c 1 WORKL(IPNTR(11)) -c 2 WORKL(IPNTR(11)+1) -c . -c . -c . -c NP WORKL(IPNTR(11)+NP-1). +c 6. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the +c NP = IPARAM(8) shifts in locations: +c 1 WORKL(IPNTR(11)) +c 2 WORKL(IPNTR(11)+1) +c . +c . +c . +c NP WORKL(IPNTR(11)+NP-1). c -c The eigenvalues of the current tridiagonal matrix are located in +c The eigenvalues of the current tridiagonal matrix are located in c WORKL(IPNTR(6)) through WORKL(IPNTR(6)+NCV-1). They are in the c order defined by WHICH. The associated Ritz estimates are located in c WORKL(IPNTR(8)), WORKL(IPNTR(8)+1), ... , WORKL(IPNTR(8)+NCV-1). @@ -347,7 +347,7 @@ c REAL RESID(N), V(LDV,NCV), WORKD(N,3), WORKL(LWORKL) c SHARED RESID(BLOCK), V(BLOCK,:), WORKD(BLOCK,:) c REPLICATED WORKL(LWORKL) -c +c c c\BeginLib c @@ -355,7 +355,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall, @@ -365,8 +365,8 @@ c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to c Implement the Spectral Transformation", Math. Comp., 48 (1987), c pp 663-673. -c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos -c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", +c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos +c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", c SIAM J. Matr. Anal. Apps., January (1993). c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines c for Updating the QR decomposition", ACM TOMS, December 1990, @@ -380,7 +380,7 @@ c sstats ARPACK routine that initialize timing and other statistics c variables. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine that prints vectors. c slamch LAPACK routine that determines machine constants. c @@ -389,14 +389,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: -c 12/15/93: Version ' 2.4' +c 12/15/93: Version ' 2.4' c -c\SCCS Information: @(#) -c FILE: saupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 +c\SCCS Information: @(#) +c FILE: saupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 c c\Remarks c 1. None @@ -406,15 +406,15 @@ c----------------------------------------------------------------------- c subroutine ssaupd - & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, + & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, & ipntr, workd, workl, lworkl, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -422,7 +422,7 @@ subroutine ssaupd c character bmat*1, which*2 integer ido, info, ldv, lworkl, n, ncv, nev - Real + Real & tol c c %-----------------% @@ -430,14 +430,14 @@ subroutine ssaupd c %-----------------% c integer iparam(11), ipntr(11) - Real + Real & resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) c c %------------% c | Parameters | c %------------% c - Real + Real & one, zero parameter (one = 1.0E+0 , zero = 0.0E+0 ) c @@ -445,7 +445,7 @@ subroutine ssaupd c | Local Scalars | c %---------------% c - integer bounds, ierr, ih, iq, ishift, iupd, iw, + integer bounds, ierr, ih, iq, ishift, iupd, iw, & ldh, ldq, msglvl, mxiter, mode, nb, & nev0, next, np, ritz, j save bounds, ierr, ih, iq, ishift, iupd, iw, @@ -456,20 +456,20 @@ subroutine ssaupd c | External Subroutines | c %----------------------% c - external ssaup2, svout, ivout, second, sstats + external ssaup2, svout, ivout, arscnd, sstats c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & slamch external slamch c c %-----------------------% c | Executable Statements | c %-----------------------% -c +c if (ido .eq. 0) then c c %-------------------------------% @@ -478,7 +478,7 @@ subroutine ssaupd c %-------------------------------% c call sstats - call second (t0) + call arscnd (t0) msglvl = msaupd c ierr = 0 @@ -512,7 +512,7 @@ subroutine ssaupd c %----------------------------------------------% c np = ncv - nev -c +c if (mxiter .le. 0) ierr = -4 if (which .ne. 'LM' .and. & which .ne. 'SM' .and. @@ -531,7 +531,7 @@ subroutine ssaupd else if (nev .eq. 1 .and. which .eq. 'BE') then ierr = -13 end if -c +c c %------------% c | Error Exit | c %------------% @@ -541,7 +541,7 @@ subroutine ssaupd ido = 99 go to 9000 end if -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -557,8 +557,8 @@ subroutine ssaupd c %----------------------------------------------% c np = ncv - nev - nev0 = nev -c + nev0 = nev +c c %-----------------------------% c | Zero out internal workspace | c %-----------------------------% @@ -566,7 +566,7 @@ subroutine ssaupd do 10 j = 1, ncv**2 + 8*ncv workl(j) = zero 10 continue -c +c c %-------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -599,7 +599,7 @@ subroutine ssaupd c | Carry out the Implicitly restarted Lanczos Iteration. | c %-------------------------------------------------------% c - call ssaup2 + call ssaup2 & ( ido, bmat, n, which, nev0, np, tol, resid, mode, iupd, & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritz), & workl(bounds), workl(iq), ldq, workl(iw), ipntr, workd, @@ -612,7 +612,7 @@ subroutine ssaupd c if (ido .eq. 3) iparam(8) = np if (ido .ne. 99) go to 9000 -c +c iparam(3) = mxiter iparam(5) = np iparam(9) = nopx @@ -628,19 +628,19 @@ subroutine ssaupd if (info .eq. 2) info = 3 c if (msglvl .gt. 0) then - call ivout (logfil, 1, mxiter, ndigit, + call ivout (logfil, 1, [mxiter], ndigit, & '_saupd: number of update iterations taken') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_saupd: number of "converged" Ritz values') - call svout (logfil, np, workl(Ritz), ndigit, + call svout (logfil, np, workl(Ritz), ndigit, & '_saupd: final Ritz values') - call svout (logfil, np, workl(Bounds), ndigit, + call svout (logfil, np, workl(Bounds), ndigit, & '_saupd: corresponding error bounds') - end if + end if c - call second (t1) + call arscnd (t1) tsaupd = t1 - t0 -c +c if (msglvl .gt. 0) then c c %--------------------------------------------------------% @@ -678,9 +678,9 @@ subroutine ssaupd & 5x, 'Total time in applying the shifts = ', f12.6,/ & 5x, 'Total time in convergence testing = ', f12.6) end if -c +c 9000 continue -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/ssconv.f b/Toolbox/arpack-src/ssconv.f index 4987c7e25..11e4cab26 100644 --- a/Toolbox/arpack-src/ssconv.f +++ b/Toolbox/arpack-src/ssconv.f @@ -3,7 +3,7 @@ c c\Name: ssconv c -c\Description: +c\Description: c Convergence testing for the symmetric Arnoldi eigenvalue routine. c c\Usage: @@ -34,23 +34,23 @@ c\BeginLib c c\Routines called: -c second ARPACK utility routine for timing. -c slamch LAPACK routine that determines machine constants. +c arscnd ARPACK utility routine for timing. +c slamch LAPACK routine that determines machine constants. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sconv.F SID: 2.4 DATE OF SID: 4/19/96 RELEASE: 2 c c\Remarks c 1. Starting with version 2.4, this routine no longer uses the -c Parlett strategy using the gap conditions. +c Parlett strategy using the gap conditions. c c\EndLib c @@ -62,8 +62,8 @@ subroutine ssconv (n, ritz, bounds, tol, nconv) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -106,9 +106,9 @@ subroutine ssconv (n, ritz, bounds, tol, nconv) c | Executable Statements | c %-----------------------% c - call second (t0) + call arscnd (t0) c - eps23 = slamch('Epsilon-Machine') + eps23 = slamch('Epsilon-Machine') eps23 = eps23**(2.0E+0 / 3.0E+0) c nconv = 0 @@ -125,10 +125,10 @@ subroutine ssconv (n, ritz, bounds, tol, nconv) end if c 10 continue -c - call second (t1) +c + call arscnd (t1) tsconv = tsconv + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/sseigt.f b/Toolbox/arpack-src/sseigt.f index 0b4a586cd..3ac336690 100644 --- a/Toolbox/arpack-src/sseigt.f +++ b/Toolbox/arpack-src/sseigt.f @@ -3,7 +3,7 @@ c c\Name: sseigt c -c\Description: +c\Description: c Compute the eigenvalues of the current symmetric tridiagonal matrix c and the corresponding error bounds given the current residual norm. c @@ -20,16 +20,16 @@ c Size of the symmetric tridiagonal matrix H. c c H Real N by 2 array. (INPUT) -c H contains the symmetric tridiagonal matrix with the -c subdiagonal in the first column starting at H(2,1) and the +c H contains the symmetric tridiagonal matrix with the +c subdiagonal in the first column starting at H(2,1) and the c main diagonal in second column. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c EIG Real array of length N. (OUTPUT) -c On output, EIG contains the N eigenvalues of H possibly +c On output, EIG contains the N eigenvalues of H possibly c unsorted. The BOUNDS arrays are returned in the c same sorted order as EIG. c @@ -59,22 +59,22 @@ c sstqrb ARPACK routine that computes the eigenvalues and the c last components of the eigenvectors of a symmetric c and tridiagonal matrix. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine that prints vectors. c scopy Level 1 BLAS that copies one vector to another. c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.4' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: seigt.F SID: 2.4 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks @@ -84,15 +84,15 @@ c c----------------------------------------------------------------------- c - subroutine sseigt + subroutine sseigt & ( rnorm, n, h, ldh, eig, bounds, workl, ierr ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -127,7 +127,7 @@ subroutine sseigt c | External Subroutines | c %----------------------% c - external scopy, sstqrb, svout, second + external scopy, sstqrb, svout, arscnd c c %-----------------------% c | Executable Statements | @@ -136,9 +136,9 @@ subroutine sseigt c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | -c %-------------------------------% +c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mseigt c if (msglvl .gt. 0) then @@ -167,8 +167,8 @@ subroutine sseigt do 30 k = 1, n bounds(k) = rnorm*abs(bounds(k)) 30 continue -c - call second (t1) +c + call arscnd (t1) tseigt = tseigt + (t1 - t0) c 9000 continue diff --git a/Toolbox/arpack-src/ssesrt.f b/Toolbox/arpack-src/ssesrt.f index 36e8787e1..afc71b088 100644 --- a/Toolbox/arpack-src/ssesrt.f +++ b/Toolbox/arpack-src/ssesrt.f @@ -4,7 +4,7 @@ c\Name: ssesrt c c\Description: -c Sort the array X in the order specified by WHICH and optionally +c Sort the array X in the order specified by WHICH and optionally c apply the permutation to the columns of the matrix A. c c\Usage: @@ -32,7 +32,7 @@ c Number of rows of the matrix A. c c A Real array of length NA by N. (INPUT/OUTPUT) -c +c c LDA Integer. (INPUT) c Leading dimension of A. c @@ -47,18 +47,18 @@ c c\Authors c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c 12/15/93: Version ' 2.1'. -c Adapted from the sort routine in LANSO and +c Adapted from the sort routine in LANSO and c the ARPACK code ssortr c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sesrt.F SID: 2.3 DATE OF SID: 4/19/96 RELEASE: 2 c c\EndLib @@ -101,7 +101,7 @@ subroutine ssesrt (which, apply, n, x, na, a, lda) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'SA') then c c X is sorted into decreasing order of algebraic. @@ -165,7 +165,7 @@ subroutine ssesrt (which, apply, n, x, na, a, lda) 80 continue c if (j.lt.0) go to 90 -c +c if (x(j).gt.x(j+igap)) then temp = x(j) x(j) = x(j+igap) @@ -179,7 +179,7 @@ subroutine ssesrt (which, apply, n, x, na, a, lda) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'LM') then c c X is sorted into increasing order of magnitude. diff --git a/Toolbox/arpack-src/sseupd.f b/Toolbox/arpack-src/sseupd.f index b8b10edbb..03ba7ac50 100644 --- a/Toolbox/arpack-src/sseupd.f +++ b/Toolbox/arpack-src/sseupd.f @@ -2,7 +2,7 @@ c c\Name: sseupd c -c\Description: +c\Description: c c This subroutine returns the converged approximations to eigenvalues c of A*z = lambda*B*z and (optionally): @@ -15,22 +15,22 @@ c (3) Both. c c There is negligible additional cost to obtain eigenvectors. An orthonormal -c (Lanczos) basis is always computed. There is an additional storage cost -c of n*nev if both are requested (in this case a separate array Z must be +c (Lanczos) basis is always computed. There is an additional storage cost +c of n*nev if both are requested (in this case a separate array Z must be c supplied). c c These quantities are obtained from the Lanczos factorization computed c by SSAUPD for the linear operator OP prescribed by the MODE selection c (see IPARAM(7) in SSAUPD documentation.) SSAUPD must be called before -c this routine is called. These approximate eigenvalues and vectors are -c commonly called Ritz values and Ritz vectors respectively. They are -c referred to as such in the comments that follow. The computed orthonormal -c basis for the invariant subspace corresponding to these Ritz values is +c this routine is called. These approximate eigenvalues and vectors are +c commonly called Ritz values and Ritz vectors respectively. They are +c referred to as such in the comments that follow. The computed orthonormal +c basis for the invariant subspace corresponding to these Ritz values is c referred to as a Lanczos basis. c -c See documentation in the header of the subroutine SSAUPD for a definition -c of OP as well as other terms and the relation of computed Ritz values -c and vectors of OP with respect to the given problem A*z = lambda*B*z. +c See documentation in the header of the subroutine SSAUPD for a definition +c of OP as well as other terms and the relation of computed Ritz values +c and vectors of OP with respect to the given problem A*z = lambda*B*z. c c The approximate eigenvalues of the original problem are returned in c ascending algebraic order. The user may elect to call this routine @@ -39,19 +39,19 @@ c with a single call. c c\Usage: -c call sseupd +c call sseupd c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, BMAT, N, WHICH, NEV, TOL, c RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, WORKL, LWORKL, INFO ) c -c RVEC LOGICAL (INPUT) -c Specifies whether Ritz vectors corresponding to the Ritz value +c RVEC LOGICAL (INPUT) +c Specifies whether Ritz vectors corresponding to the Ritz value c approximations to the eigenproblem A*z = lambda*B*z are computed. c c RVEC = .FALSE. Compute Ritz values only. c c RVEC = .TRUE. Compute Ritz vectors. c -c HOWMNY Character*1 (INPUT) +c HOWMNY Character*1 (INPUT) c Specifies how many Ritz vectors are wanted and the form of Z c the matrix of Ritz vectors. See remark 1 below. c = 'A': compute NEV Ritz vectors; @@ -61,7 +61,7 @@ c SELECT Logical array of dimension NCV. (INPUT/WORKSPACE) c If HOWMNY = 'S', SELECT specifies the Ritz vectors to be c computed. To select the Ritz vector corresponding to a -c Ritz value D(j), SELECT(j) must be set to .TRUE.. +c Ritz value D(j), SELECT(j) must be set to .TRUE.. c If HOWMNY = 'A' , SELECT is used as a workspace for c reordering the Ritz values. c @@ -70,8 +70,8 @@ c eigenvalues of A*z = lambda*B*z. The values are returned c in ascending order. If IPARAM(7) = 3,4,5 then D represents c the Ritz values of OP computed by ssaupd transformed to -c those of the original eigensystem A*z = lambda*B*z. If -c IPARAM(7) = 1,2 then the Ritz values of OP are the same +c those of the original eigensystem A*z = lambda*B*z. If +c IPARAM(7) = 1,2 then the Ritz values of OP are the same c as the those of A*z = lambda*B*z. c c Z Real N by NEV array if HOWMNY = 'A'. (OUTPUT) @@ -79,7 +79,7 @@ c eigensystem A*z = lambda*B*z corresponding to the Ritz c value approximations. c If RVEC = .FALSE. then Z is not referenced. -c NOTE: The array Z may be set equal to first NEV columns of the +c NOTE: The array Z may be set equal to first NEV columns of the c Arnoldi/Lanczos basis array V computed by SSAUPD. c c LDZ Integer. (INPUT) @@ -144,7 +144,7 @@ c = -17: SSEUPD got a different count of the number of converged c Ritz values than SSAUPD got. This indicates the user c probably made an error in passing data from SSAUPD to -c SSEUPD or that the data was modified before entering +c SSEUPD or that the data was modified before entering c SSEUPD. c c\BeginLib @@ -153,7 +153,7 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall, @@ -163,19 +163,19 @@ c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to c Implement the Spectral Transformation", Math. Comp., 48 (1987), c pp 663-673. -c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos -c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", +c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos +c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems", c SIAM J. Matr. Anal. Apps., January (1993). c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines c for Updating the QR decomposition", ACM TOMS, December 1990, c Volume 16 Number 4, pp 369-377. c c\Remarks -c 1. The converged Ritz values are always returned in increasing +c 1. The converged Ritz values are always returned in increasing c (algebraic) order. c c 2. Currently only HOWMNY = 'A' is implemented. It is included at this -c stage for the user who wants to incorporate it. +c stage for the user who wants to incorporate it. c c\Routines called: c ssesrt ARPACK routine that sorts an array X, and applies the @@ -201,15 +201,15 @@ c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Chao Yang Houston, Texas -c Dept. of Computational & +c Dept. of Computational & c Applied Mathematics -c Rice University -c Houston, Texas -c +c Rice University +c Houston, Texas +c c\Revision history: c 12/15/93: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: seupd.F SID: 2.11 DATE OF SID: 04/10/01 RELEASE: 2 c c\EndLib @@ -226,8 +226,8 @@ subroutine sseupd(rvec , howmny, select, d , c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -236,7 +236,7 @@ subroutine sseupd(rvec , howmny, select, d , character bmat, howmny, which*2 logical rvec integer info, ldz, ldv, lworkl, n, ncv, nev - Real + Real & sigma, tol c c %-----------------% @@ -245,7 +245,7 @@ subroutine sseupd(rvec , howmny, select, d , c integer iparam(7), ipntr(11) logical select(ncv) - Real + Real & d(nev) , resid(n) , v(ldv,ncv), & z(ldz, nev), workd(2*n), workl(lworkl) c @@ -253,7 +253,7 @@ subroutine sseupd(rvec , howmny, select, d , c | Parameters | c %------------% c - Real + Real & one, zero parameter (one = 1.0E+0 , zero = 0.0E+0 ) c @@ -267,7 +267,7 @@ subroutine sseupd(rvec , howmny, select, d , & ldq , mode , msglvl, nconv , next , & ritz , irz , ibd , np , ishift, & leftptr, rghtptr, numcnv, jj - Real + Real & bnorm2 , rnorm, temp, temp1, eps23 logical reord c @@ -275,14 +275,14 @@ subroutine sseupd(rvec , howmny, select, d , c | External Subroutines | c %----------------------% c - external scopy , sger , sgeqr2, slacpy, sorm2r, sscal, + external scopy , sger , sgeqr2, slacpy, sorm2r, sscal, & ssesrt, ssteqr, sswap , svout , ivout , ssortr c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & snrm2, slamch external snrm2, slamch c @@ -295,7 +295,7 @@ subroutine sseupd(rvec , howmny, select, d , c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -312,7 +312,7 @@ subroutine sseupd(rvec , howmny, select, d , if (nconv .eq. 0) go to 9000 ierr = 0 c - if (nconv .le. 0) ierr = -14 + if (nconv .le. 0) ierr = -14 if (n .le. 0) ierr = -1 if (nev .le. 0) ierr = -2 if (ncv .le. nev .or. ncv .gt. n) ierr = -3 @@ -324,12 +324,12 @@ subroutine sseupd(rvec , howmny, select, d , if (bmat .ne. 'I' .and. bmat .ne. 'G') ierr = -6 if ( (howmny .ne. 'A' .and. & howmny .ne. 'P' .and. - & howmny .ne. 'S') .and. rvec ) + & howmny .ne. 'S') .and. rvec ) & ierr = -15 if (rvec .and. howmny .eq. 'S') ierr = -16 c if (rvec .and. lworkl .lt. ncv**2+8*ncv) ierr = -7 -c +c if (mode .eq. 1 .or. mode .eq. 2) then type = 'REGULR' else if (mode .eq. 3 ) then @@ -338,7 +338,7 @@ subroutine sseupd(rvec , howmny, select, d , type = 'BUCKLE' else if (mode .eq. 5 ) then type = 'CAYLEY' - else + else ierr = -10 end if if (mode .eq. 1 .and. bmat .eq. 'G') ierr = -11 @@ -352,7 +352,7 @@ subroutine sseupd(rvec , howmny, select, d , info = ierr go to 9000 end if -c +c c %-------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -427,7 +427,7 @@ subroutine sseupd(rvec , howmny, select, d , c | Set machine dependent constant. | c %---------------------------------% c - eps23 = slamch('Epsilon-Machine') + eps23 = slamch('Epsilon-Machine') eps23 = eps23**(2.0E+0 / 3.0E+0 ) c c %---------------------------------------% @@ -501,7 +501,7 @@ subroutine sseupd(rvec , howmny, select, d , & workl(ibd+jj-1) .le. tol*temp1) then select(jj) = .true. numcnv = numcnv + 1 - if (jj .gt. nev) reord = .true. + if (jj .gt. nconv) reord = .true. endif 11 continue c @@ -513,9 +513,9 @@ subroutine sseupd(rvec , howmny, select, d , c %-----------------------------------------------------------% c if (msglvl .gt. 2) then - call ivout(logfil, 1, numcnv, ndigit, + call ivout(logfil, 1, [numcnv], ndigit, & '_seupd: Number of specified eigenvalues') - call ivout(logfil, 1, nconv, ndigit, + call ivout(logfil, 1, [nconv], ndigit, & '_seupd: Number of "converged" eigenvalues') end if c @@ -609,9 +609,9 @@ subroutine sseupd(rvec , howmny, select, d , c if (leftptr .lt. rghtptr) go to 20 c - 30 end if + end if c - if (msglvl .gt. 2) then + 30 if (msglvl .gt. 2) then call svout (logfil, ncv, workl(ihd), ndigit, & '_seupd: The eigenvalues of H--reordered') end if @@ -652,8 +652,8 @@ subroutine sseupd(rvec , howmny, select, d , call scopy(ncv, workl(bounds), 1, workl(ihb), 1) end if c - else -c + else +c c %-------------------------------------------------------------% c | * Make a copy of all the Ritz values. | c | * Transform the Ritz values back to the original system. | @@ -670,13 +670,13 @@ subroutine sseupd(rvec , howmny, select, d , c %-------------------------------------------------------------% c call scopy (ncv, workl(ihd), 1, workl(iw), 1) - if (type .eq. 'SHIFTI') then + if (type .eq. 'SHIFTI') then do 40 k=1, ncv workl(ihd+k-1) = one / workl(ihd+k-1) + sigma 40 continue else if (type .eq. 'BUCKLE') then do 50 k=1, ncv - workl(ihd+k-1) = sigma * workl(ihd+k-1) / + workl(ihd+k-1) = sigma * workl(ihd+k-1) / & (workl(ihd+k-1) - one) 50 continue else if (type .eq. 'CAYLEY') then @@ -685,7 +685,7 @@ subroutine sseupd(rvec , howmny, select, d , & (workl(ihd+k-1) - one) 60 continue end if -c +c c %-------------------------------------------------------------% c | * Store the wanted NCONV lambda values into D. | c | * Sort the NCONV wanted lambda in WORKL(IHD:IHD+NCONV-1) | @@ -711,8 +711,8 @@ subroutine sseupd(rvec , howmny, select, d , call ssortr('LA', .true., nconv, d, workl(ihb)) end if c - end if -c + end if +c c %------------------------------------------------% c | Compute the Ritz vectors. Transform the wanted | c | eigenvectors of the symmetric tridiagonal H by | @@ -720,25 +720,25 @@ subroutine sseupd(rvec , howmny, select, d , c %------------------------------------------------% c if (rvec .and. howmny .eq. 'A') then -c +c c %----------------------------------------------------------% c | Compute the QR factorization of the matrix representing | c | the wanted invariant subspace located in the first NCONV | c | columns of workl(iq,ldq). | c %----------------------------------------------------------% -c +c call sgeqr2(ncv, nconv , workl(iq) , & ldq, workl(iw+ncv), workl(ihb), & ierr) c c %--------------------------------------------------------% -c | * Postmultiply V by Q. | +c | * Postmultiply V by Q. | c | * Copy the first NCONV columns of VQ into Z. | c | The N by NCONV matrix Z is now a matrix representation | c | of the approximate invariant subspace associated with | c | the Ritz values in workl(ihd). | c %--------------------------------------------------------% -c +c call sorm2r('Right', 'Notranspose', n , & ncv , nconv , workl(iq), & ldq , workl(iw+ncv), v , @@ -752,7 +752,7 @@ subroutine sseupd(rvec , howmny, select, d , c %-----------------------------------------------------% c do 65 j = 1, ncv-1 - workl(ihb+j-1) = zero + workl(ihb+j-1) = zero 65 continue workl(ihb+ncv-1) = one call sorm2r('Left', 'Transpose' , ncv , @@ -760,6 +760,16 @@ subroutine sseupd(rvec , howmny, select, d , & ldq , workl(iw+ncv), workl(ihb), & ncv , temp , ierr) c +c %-----------------------------------------------------% +c | Make a copy of the last row into | +c | workl(iw+ncv:iw+2*ncv), as it is needed again in | +c | the Ritz vector purification step below | +c %-----------------------------------------------------% +c + do 67 j = 1, nconv + workl(iw+ncv+j-1) = workl(ihb+j-1) + 67 continue + else if (rvec .and. howmny .eq. 'S') then c c Not yet implemented. See remark 2 above. @@ -784,10 +794,10 @@ subroutine sseupd(rvec , howmny, select, d , c %-------------------------------------------------% c call sscal (ncv, bnorm2, workl(ihb), 1) - if (type .eq. 'SHIFTI') then + if (type .eq. 'SHIFTI') then c do 80 k=1, ncv - workl(ihb+k-1) = abs( workl(ihb+k-1) ) + workl(ihb+k-1) = abs( workl(ihb+k-1) ) & / workl(iw+k-1)**2 80 continue c @@ -812,15 +822,15 @@ subroutine sseupd(rvec , howmny, select, d , if (type .ne. 'REGULR' .and. msglvl .gt. 1) then call svout(logfil, nconv, d, ndigit, & '_seupd: Untransformed converged Ritz values') - call svout(logfil, nconv, workl(ihb), ndigit, + call svout(logfil, nconv, workl(ihb), ndigit, & '_seupd: Ritz estimates of the untransformed Ritz values') else if (msglvl .gt. 1) then call svout(logfil, nconv, d, ndigit, & '_seupd: Converged Ritz values') - call svout(logfil, nconv, workl(ihb), ndigit, + call svout(logfil, nconv, workl(ihb), ndigit, & '_seupd: Associated Ritz estimates') end if -c +c c %-------------------------------------------------% c | Ritz vector purification step. Formally perform | c | one of inverse subspace iteration. Only used | @@ -830,18 +840,18 @@ subroutine sseupd(rvec , howmny, select, d , if (rvec .and. (type .eq. 'SHIFTI' .or. type .eq. 'CAYLEY')) then c do 110 k=0, nconv-1 - workl(iw+k) = workl(iq+k*ldq+ncv-1) + workl(iw+k) = workl(iw+ncv+k) & / workl(iw+k) 110 continue c else if (rvec .and. type .eq. 'BUCKLE') then c do 120 k=0, nconv-1 - workl(iw+k) = workl(iq+k*ldq+ncv-1) + workl(iw+k) = workl(iw+ncv+k) & / (workl(iw+k)-one) 120 continue c - end if + end if c if (type .ne. 'REGULR') & call sger (n, nconv, one, resid, 1, workl(iw), 1, z, ldz) diff --git a/Toolbox/arpack-src/ssgets.f b/Toolbox/arpack-src/ssgets.f index a0e3182d1..f40ca76a8 100644 --- a/Toolbox/arpack-src/ssgets.f +++ b/Toolbox/arpack-src/ssgets.f @@ -3,13 +3,13 @@ c c\Name: ssgets c -c\Description: +c\Description: c Given the eigenvalues of the symmetric tridiagonal matrix H, -c computes the NP shifts AMU that are zeros of the polynomial of -c degree NP which filters out components of the unwanted eigenvectors +c computes the NP shifts AMU that are zeros of the polynomial of +c degree NP which filters out components of the unwanted eigenvectors c corresponding to the AMU's based on some given criteria. c -c NOTE: This is called even in the case of user specified shifts in +c NOTE: This is called even in the case of user specified shifts in c order to sort the eigenvalues, and error bounds of H for later use. c c\Usage: @@ -39,8 +39,8 @@ c c RITZ Real array of length KEV+NP. (INPUT/OUTPUT) c On INPUT, RITZ contains the eigenvalues of H. -c On OUTPUT, RITZ are sorted so that the unwanted eigenvalues -c are in the first NP locations and the wanted part is in +c On OUTPUT, RITZ are sorted so that the unwanted eigenvalues +c are in the first NP locations and the wanted part is in c the last KEV locations. When exact shifts are selected, the c unwanted part corresponds to the shifts to be applied. c @@ -49,7 +49,7 @@ c c SHIFTS Real array of length NP. (INPUT/OUTPUT) c On INPUT: contains the user specified shifts if ISHIFT = 0. -c On OUTPUT: contains the shifts sorted into decreasing order +c On OUTPUT: contains the shifts sorted into decreasing order c of magnitude with respect to the Ritz estimates contained in c BOUNDS. If ISHIFT = 0, SHIFTS is not modified on exit. c @@ -65,7 +65,7 @@ c\Routines called: c ssortr ARPACK utility sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c svout ARPACK utility routine that prints vectors. c scopy Level 1 BLAS that copies one vector to another. c sswap Level 1 BLAS that swaps the contents of two vectors. @@ -75,13 +75,13 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/93: Version ' 2.1' c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sgets.F SID: 2.4 DATE OF SID: 4/19/96 RELEASE: 2 c c\Remarks @@ -96,8 +96,8 @@ subroutine ssgets ( ishift, which, kev, np, ritz, bounds, shifts ) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -131,7 +131,7 @@ subroutine ssgets ( ishift, which, kev, np, ritz, bounds, shifts ) c | External Subroutines | c %----------------------% c - external sswap, scopy, ssortr, second + external sswap, scopy, ssortr, arscnd c c %---------------------% c | Intrinsic Functions | @@ -142,15 +142,15 @@ subroutine ssgets ( ishift, which, kev, np, ritz, bounds, shifts ) c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = msgets -c +c if (which .eq. 'BE') then c c %-----------------------------------------------------% @@ -163,11 +163,11 @@ subroutine ssgets ( ishift, which, kev, np, ritz, bounds, shifts ) c %-----------------------------------------------------% c call ssortr ('LA', .true., kev+np, ritz, bounds) - kevd2 = kev / 2 + kevd2 = kev / 2 if ( kev .gt. 1 ) then - call sswap ( min(kevd2,np), ritz, 1, + call sswap ( min(kevd2,np), ritz, 1, & ritz( max(kevd2,np)+1 ), 1) - call sswap ( min(kevd2,np), bounds, 1, + call sswap ( min(kevd2,np), bounds, 1, & bounds( max(kevd2,np)+1 ), 1) end if c @@ -185,7 +185,7 @@ subroutine ssgets ( ishift, which, kev, np, ritz, bounds, shifts ) end if c if (ishift .eq. 1 .and. np .gt. 0) then -c +c c %-------------------------------------------------------% c | Sort the unwanted Ritz values used as shifts so that | c | the ones with largest Ritz estimates are first. | @@ -193,23 +193,23 @@ subroutine ssgets ( ishift, which, kev, np, ritz, bounds, shifts ) c | forward instability of the iteration when the shifts | c | are applied in subroutine ssapps. | c %-------------------------------------------------------% -c +c call ssortr ('SM', .true., np, bounds, ritz) call scopy (np, ritz, 1, shifts, 1) end if -c - call second (t1) +c + call arscnd (t1) tsgets = tsgets + (t1 - t0) c if (msglvl .gt. 0) then - call ivout (logfil, 1, kev, ndigit, '_sgets: KEV is') - call ivout (logfil, 1, np, ndigit, '_sgets: NP is') + call ivout (logfil, 1, [kev], ndigit, '_sgets: KEV is') + call ivout (logfil, 1, [np], ndigit, '_sgets: NP is') call svout (logfil, kev+np, ritz, ndigit, & '_sgets: Eigenvalues of current H matrix') - call svout (logfil, kev+np, bounds, ndigit, + call svout (logfil, kev+np, bounds, ndigit, & '_sgets: Associated Ritz estimates') end if -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/ssortc.f b/Toolbox/arpack-src/ssortc.f index dba628ff9..e322039cd 100644 --- a/Toolbox/arpack-src/ssortc.f +++ b/Toolbox/arpack-src/ssortc.f @@ -4,7 +4,7 @@ c\Name: ssortc c c\Description: -c Sorts the complex array in XREAL and XIMAG into the order +c Sorts the complex array in XREAL and XIMAG into the order c specified by WHICH and optionally applies the permutation to the c real array Y. It is assumed that if an element of XIMAG is c nonzero, then its negative is also an element. In other words, @@ -49,14 +49,14 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c xx/xx/92: Version ' 2.1' c Adapted from the sort routine in LANSO. c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sortc.F SID: 2.3 DATE OF SID: 4/20/96 RELEASE: 2 c c\EndLib @@ -77,7 +77,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) c | Array Arguments | c %-----------------% c - Real + Real & xreal(0:n-1), ximag(0:n-1), y(0:n-1) c c %---------------% @@ -85,14 +85,14 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) c %---------------% c integer i, igap, j - Real + Real & temp, temp1, temp2 c c %--------------------% c | External Functions | c %--------------------% c - Real + Real & slapy2 external slapy2 c @@ -101,7 +101,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'LM') then c c %------------------------------------------------------% @@ -169,7 +169,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -183,7 +183,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) 60 continue igap = igap / 2 go to 40 -c +c else if (which .eq. 'LR') then c c %------------------------------------------------% @@ -207,7 +207,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -221,7 +221,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'SR') then c c %------------------------------------------------% @@ -244,7 +244,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -258,7 +258,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) 120 continue igap = igap / 2 go to 100 -c +c else if (which .eq. 'LI') then c c %------------------------------------------------% @@ -281,7 +281,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -295,7 +295,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) 150 continue igap = igap / 2 go to 130 -c +c else if (which .eq. 'SI') then c c %------------------------------------------------% @@ -318,7 +318,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) temp = ximag(j) ximag(j) = ximag(j+igap) ximag(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -333,7 +333,7 @@ subroutine ssortc (which, apply, n, xreal, ximag, y) igap = igap / 2 go to 160 end if -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/ssortr.f b/Toolbox/arpack-src/ssortr.f index 267b1251c..25d324b65 100644 --- a/Toolbox/arpack-src/ssortr.f +++ b/Toolbox/arpack-src/ssortr.f @@ -4,7 +4,7 @@ c\Name: ssortr c c\Description: -c Sort the array X1 in the order specified by WHICH and optionally +c Sort the array X1 in the order specified by WHICH and optionally c applies the permutation to the array X2. c c\Usage: @@ -39,17 +39,17 @@ c c\Author c Danny Sorensen Phuong Vu -c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas +c Richard Lehoucq CRPC / Rice University +c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c c\Revision history: c 12/16/93: Version ' 2.1'. c Adapted from the sort routine in LANSO. c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: sortr.F SID: 2.3 DATE OF SID: 4/19/96 RELEASE: 2 c c\EndLib @@ -86,7 +86,7 @@ subroutine ssortr (which, apply, n, x1, x2) c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'SA') then c c X1 is sorted into decreasing order of algebraic. @@ -158,7 +158,7 @@ subroutine ssortr (which, apply, n, x1, x2) 80 continue c if (j.lt.0) go to 90 -c +c if (x1(j).gt.x1(j+igap)) then temp = x1(j) x1(j) = x1(j+igap) @@ -176,7 +176,7 @@ subroutine ssortr (which, apply, n, x1, x2) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'LM') then c c X1 is sorted into increasing order of magnitude. diff --git a/Toolbox/arpack-src/sstatn.f b/Toolbox/arpack-src/sstatn.f index fba67fdb6..f3288c1ab 100644 --- a/Toolbox/arpack-src/sstatn.f +++ b/Toolbox/arpack-src/sstatn.f @@ -9,10 +9,10 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: statn.F SID: 2.4 DATE OF SID: 4/20/96 RELEASE: 2 c subroutine sstatn @@ -21,8 +21,8 @@ subroutine sstatn c | See stat.doc for documentation | c %--------------------------------% c - include 'stat.fi' -c + include 'stat.h' +c c %-----------------------% c | Executable Statements | c %-----------------------% @@ -32,7 +32,7 @@ subroutine sstatn nrorth = 0 nitref = 0 nrstrt = 0 -c +c tnaupd = 0.0E+0 tnaup2 = 0.0E+0 tnaitr = 0.0E+0 @@ -43,14 +43,14 @@ subroutine sstatn titref = 0.0E+0 tgetv0 = 0.0E+0 trvec = 0.0E+0 -c +c c %----------------------------------------------------% c | User time including reverse communication overhead | c %----------------------------------------------------% c tmvopx = 0.0E+0 tmvbx = 0.0E+0 -c +c return c c diff --git a/Toolbox/arpack-src/sstats.f b/Toolbox/arpack-src/sstats.f index fef9112c6..0822d3f3a 100644 --- a/Toolbox/arpack-src/sstats.f +++ b/Toolbox/arpack-src/sstats.f @@ -1,18 +1,18 @@ c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: stats.F SID: 2.1 DATE OF SID: 4/19/96 RELEASE: 2 c %---------------------------------------------% c | Initialize statistic and timing information | c | for symmetric Arnoldi code. | c %---------------------------------------------% - + subroutine sstats c %--------------------------------% c | See stat.doc for documentation | c %--------------------------------% - include 'stat.fi' - + include 'stat.h' + c %-----------------------% c | Executable Statements | c %-----------------------% @@ -22,7 +22,7 @@ subroutine sstats nrorth = 0 nitref = 0 nrstrt = 0 - + tsaupd = 0.0E+0 tsaup2 = 0.0E+0 tsaitr = 0.0E+0 @@ -33,13 +33,13 @@ subroutine sstats titref = 0.0E+0 tgetv0 = 0.0E+0 trvec = 0.0E+0 - + c %----------------------------------------------------% c | User time including reverse communication overhead | c %----------------------------------------------------% tmvopx = 0.0E+0 tmvbx = 0.0E+0 - + return c c End of sstats diff --git a/Toolbox/arpack-src/sstqrb.f b/Toolbox/arpack-src/sstqrb.f index 9fd1e1925..9697c3660 100644 --- a/Toolbox/arpack-src/sstqrb.f +++ b/Toolbox/arpack-src/sstqrb.f @@ -32,13 +32,13 @@ c On exit, E has been destroyed. c c Z Real array, dimension (N). (OUTPUT) -c On exit, Z contains the last row of the orthonormal -c eigenvector matrix of the symmetric tridiagonal matrix. +c On exit, Z contains the last row of the orthonormal +c eigenvector matrix of the symmetric tridiagonal matrix. c If an error exit is made, Z contains the last row of the c eigenvector matrix associated with the stored eigenvalues. c c WORK Real array, dimension (max(1,2*N-2)). (WORKSPACE) -c Workspace used in accumulating the transformation for +c Workspace used in accumulating the transformation for c computing the last components of the eigenvectors. c c INFO Integer. (OUTPUT) @@ -62,9 +62,9 @@ c scopy Level 1 BLAS that copies one vector to another. c sswap Level 1 BLAS that swaps the contents of two vectors. c lsame LAPACK character comparison routine. -c slae2 LAPACK routine that computes the eigenvalues of a 2-by-2 +c slae2 LAPACK routine that computes the eigenvalues of a 2-by-2 c symmetric matrix. -c slaev2 LAPACK routine that eigendecomposition of a 2-by-2 symmetric +c slaev2 LAPACK routine that eigendecomposition of a 2-by-2 symmetric c matrix. c slamch LAPACK routine that determines machine constants. c slanst LAPACK routine that computes the norm of a matrix. @@ -72,7 +72,7 @@ c slartg LAPACK Givens rotation construction routine. c slascl LAPACK routine for careful scaling of a matrix. c slaset LAPACK matrix initialization routine. -c slasr LAPACK routine that applies an orthogonal transformation to +c slasr LAPACK routine that applies an orthogonal transformation to c a matrix. c slasrt LAPACK sorting routine. c ssteqr LAPACK routine that computes eigenvalues and eigenvectors @@ -84,19 +84,19 @@ c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas c Applied Mathematics -c Rice University -c Houston, Texas +c Rice University +c Houston, Texas c -c\SCCS Information: @(#) +c\SCCS Information: @(#) c FILE: stqrb.F SID: 2.5 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks c 1. Starting with version 2.5, this routine is a modified version c of LAPACK version 2.0 subroutine SSTEQR. No lines are deleted, -c only commeted out and new lines inserted. +c only commented out and new lines inserted. c All lines commented out have "c$$$" at the beginning. c Note that the LAPACK version 1.0 subroutine SSTEQR contained -c bugs. +c bugs. c c\EndLib c @@ -118,9 +118,9 @@ subroutine sstqrb ( n, d, e, z, work, info ) & d( n ), e( n-1 ), z( n ), work( 2*n-2 ) c c .. parameters .. - Real + Real & zero, one, two, three - parameter ( zero = 0.0E+0, one = 1.0E+0, + parameter ( zero = 0.0E+0, one = 1.0E+0, & two = 2.0E+0, three = 3.0E+0 ) integer maxit parameter ( maxit = 30 ) @@ -129,7 +129,7 @@ subroutine sstqrb ( n, d, e, z, work, info ) integer i, icompz, ii, iscale, j, jtot, k, l, l1, lend, & lendm1, lendp1, lendsv, lm1, lsv, m, mm, mm1, & nm1, nmaxit - Real + Real & anorm, b, c, eps, eps2, f, g, p, r, rt1, rt2, & s, safmax, safmin, ssfmax, ssfmin, tst c .. @@ -380,9 +380,9 @@ subroutine sstqrb ( n, d, e, z, work, info ) c c *** New starting with version 2.5 *** c - call slasr( 'r', 'v', 'b', 1, mm, work( l ), + call slasr( 'r', 'v', 'b', 1, mm, work( l ), & work( n-1+l ), z( l ), 1 ) -c ************************************* +c ************************************* end if c d( l ) = d( l ) - p @@ -440,7 +440,7 @@ subroutine sstqrb ( n, d, e, z, work, info ) tst = z(l) z(l) = c*tst - s*z(l-1) z(l-1) = s*tst + c*z(l-1) -c ************************************* +c ************************************* else call slae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 ) end if @@ -502,7 +502,7 @@ subroutine sstqrb ( n, d, e, z, work, info ) c call slasr( 'r', 'v', 'f', 1, mm, work( m ), work( n-1+m ), & z( m ), 1 ) -c ************************************* +c ************************************* end if c d( l ) = d( l ) - p diff --git a/Toolbox/arpack-src/stat.fi b/Toolbox/arpack-src/stat.h similarity index 69% rename from Toolbox/arpack-src/stat.fi rename to Toolbox/arpack-src/stat.h index 1ca12fd20..66a8e9f87 100644 --- a/Toolbox/arpack-src/stat.fi +++ b/Toolbox/arpack-src/stat.h @@ -1,13 +1,21 @@ +c %--------------------------------% +c | See stat.doc for documentation | +c %--------------------------------% +c +c\SCCS Information: @(#) +c FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 +c real t0, t1, t2, t3, t4, t5 save t0, t1, t2, t3, t4, t5 +c integer nopx, nbx, nrorth, nitref, nrstrt real tsaupd, tsaup2, tsaitr, tseigt, tsgets, tsapps, tsconv, & tnaupd, tnaup2, tnaitr, tneigh, tngets, tnapps, tnconv, & tcaupd, tcaup2, tcaitr, tceigh, tcgets, tcapps, tcconv, & tmvopx, tmvbx, tgetv0, titref, trvec - common /timing/ + common /timing/ & nopx, nbx, nrorth, nitref, nrstrt, & tsaupd, tsaup2, tsaitr, tseigt, tsgets, tsapps, tsconv, & tnaupd, tnaup2, tnaitr, tneigh, tngets, tnapps, tnconv, & tcaupd, tcaup2, tcaitr, tceigh, tcgets, tcapps, tcconv, - & tmvopx, tmvbx, tgetv0, titref, trvec \ No newline at end of file + & tmvopx, tmvbx, tgetv0, titref, trvec diff --git a/Toolbox/arpack-src/version.h b/Toolbox/arpack-src/version.h new file mode 100644 index 000000000..ecdd9b340 --- /dev/null +++ b/Toolbox/arpack-src/version.h @@ -0,0 +1,30 @@ +/* + + In the current version, the parameter KAPPA in the Kahan's test + for orthogonality is set to 0.717, the same as used by Gragg & Reichel. + However computational experience indicates that this is a little too + strict and will frequently force reorthogonalization when it is not + necessary to do so. + + Also the "moving boundary" idea is not currently activated in the nonsymmetric + code since it is not conclusive that it's the right thing to do all the time. + Requires further investigation. + + As of 02/01/93 Richard Lehoucq assumes software control of the codes from + Phuong Vu. On 03/01/93 all the *.F files were migrated SCCS. The 1.1 version + of codes are those received from Phuong Vu. The frozen version of 07/08/92 + is now considered version 1.1. + + Version 2.1 contains two new symmetric routines, sesrt and seupd. + Changes as well as bug fixes for version 1.1 codes that were only corrected + for programming bugs are version 1.2. These 1.2 versions will also be in version 2.1. + Subroutine [d,s]saupd now requires slightly more workspace. See [d,s]saupd for the + details. + + \SCCS Information: @(#) + FILE: version.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 + + */ + +#define VERSION_NUMBER ' 2.1' +#define VERSION_DATE ' 11/15/95' diff --git a/Toolbox/arpack-src/zgetv0.f b/Toolbox/arpack-src/zgetv0.f index b0cb74719..cc13c3cfb 100644 --- a/Toolbox/arpack-src/zgetv0.f +++ b/Toolbox/arpack-src/zgetv0.f @@ -2,13 +2,13 @@ c c\Name: zgetv0 c -c\Description: +c\Description: c Generate a random initial residual vector for the Arnoldi process. -c Force the residual vector to be in the range of the operator OP. +c Force the residual vector to be in the range of the operator OP. c c\Usage: c call zgetv0 -c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, +c ( IDO, BMAT, ITRY, INITV, N, J, V, LDV, RESID, RNORM, c IPNTR, WORKD, IERR ) c c\Arguments @@ -35,7 +35,7 @@ c B = 'G' -> generalized eigenvalue problem A*x = lambda*B*x c c ITRY Integer. (INPUT) -c ITRY counts the number of times that zgetv0 is called. +c ITRY counts the number of times that zgetv0 is called. c It should be set to 1 on the initial call to zgetv0. c c INITV Logical variable. (INPUT) @@ -54,11 +54,11 @@ c if this is a "restart". c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c RESID Complex*16 array of length N. (INPUT/OUTPUT) -c Initial residual vector to be generated. If RESID is +c Initial residual vector to be generated. If RESID is c provided, force RESID into the range of the operator OP. c c RNORM Double precision scalar. (OUTPUT) @@ -89,21 +89,21 @@ c pp 357-385. c c\Routines called: -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c zvout ARPACK utility routine that prints vectors. -c zlarnv LAPACK routine for generating a random vector. +c zlarnv LAPACK routine for generating a random vector. c zgemv Level 2 BLAS routine for matrix vector multiplication. c zcopy Level 1 BLAS that copies one vector to another. c zdotc Level 1 BLAS that computes the scalar product of two vectors. -c dznrm2 Level 1 BLAS that computes the norm of a vector. +c dznrm2 Level 1 BLAS that computes the norm of a vector. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: getv0.F SID: 2.3 DATE OF SID: 08/27/96 RELEASE: 2 @@ -112,16 +112,16 @@ c c----------------------------------------------------------------------- c - subroutine zgetv0 - & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, + subroutine zgetv0 + & ( ido, bmat, itry, initv, n, j, v, ldv, resid, rnorm, & ipntr, workd, ierr ) -c +c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -156,35 +156,29 @@ subroutine zgetv0 c | Local Scalars & Arrays | c %------------------------% c - logical first, inits, orth + logical first, orth integer idist, iseed(4), iter, msglvl, jj Double precision & rnorm0 Complex*16 & cnorm - save first, iseed, inits, iter, msglvl, orth, rnorm0 + save first, iseed, iter, msglvl, orth, rnorm0 c c %----------------------% c | External Subroutines | c %----------------------% c - external zcopy, zgemv, zlarnv, zvout, second + external zcopy, zgemv, zlarnv, zvout, arscnd c c %--------------------% c | External Functions | c %--------------------% c - Double precision + Double precision & dznrm2, dlapy2 Complex*16 - & zdotc - external zdotc, dznrm2, dlapy2 -c -c %-----------------% -c | Data Statements | -c %-----------------% -c - data inits /.true./ + & zzdotc + external zzdotc, dznrm2, dlapy2 c c %-----------------------% c | Executable Statements | @@ -196,24 +190,21 @@ subroutine zgetv0 c | random number generator | c %-----------------------------------% c - if (inits) then - iseed(1) = 1 - iseed(2) = 3 - iseed(3) = 5 - iseed(4) = 7 - inits = .false. - end if + iseed(1) = 1 + iseed(2) = 3 + iseed(3) = 5 + iseed(4) = 7 c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mgetv0 -c +c ierr = 0 iter = 0 first = .FALSE. @@ -232,48 +223,50 @@ subroutine zgetv0 idist = 2 call zlarnv (idist, iseed, n, resid) end if -c +c c %----------------------------------------------------------% c | Force the starting vector into the range of OP to handle | c | the generalized problem when B is possibly (singular). | c %----------------------------------------------------------% c - call second (t2) - if (bmat .eq. 'G') then + call arscnd (t2) + if (itry .eq. 1) then nopx = nopx + 1 ipntr(1) = 1 ipntr(2) = n + 1 call zcopy (n, resid, 1, workd, 1) ido = -1 go to 9000 + else if (itry .gt. 1 .and. bmat .eq. 'G') then + call zcopy (n, resid, 1, workd(n + 1), 1) end if end if -c +c c %----------------------------------------% -c | Back from computing B*(initial-vector) | +c | Back from computing OP*(initial-vector) | c %----------------------------------------% c if (first) go to 20 c c %-----------------------------------------------% -c | Back from computing B*(orthogonalized-vector) | +c | Back from computing OP*(orthogonalized-vector) | c %-----------------------------------------------% c if (orth) go to 40 -c - call second (t3) +c + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) -c +c c %------------------------------------------------------% c | Starting vector is now in the range of OP; r = OP*r; | c | Compute B-norm of starting vector. | c %------------------------------------------------------% c - call second (t2) + call arscnd (t2) first = .TRUE. + if (itry .eq. 1) call zcopy (n, workd(n + 1), 1, resid, 1) if (bmat .eq. 'G') then nbx = nbx + 1 - call zcopy (n, workd(n+1), 1, resid, 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 @@ -281,18 +274,18 @@ subroutine zgetv0 else if (bmat .eq. 'I') then call zcopy (n, resid, 1, workd, 1) end if -c +c 20 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c first = .FALSE. if (bmat .eq. 'G') then - cnorm = zdotc (n, resid, 1, workd, 1) - rnorm0 = sqrt(dlapy2(dble(cnorm),dimag(cnorm))) + cnorm = zzdotc (n, resid, 1, workd, 1) + rnorm0 = sqrt(dlapy2(dble(cnorm),aimag(cnorm))) else if (bmat .eq. 'I') then rnorm0 = dznrm2(n, resid, 1) end if @@ -303,7 +296,7 @@ subroutine zgetv0 c %---------------------------------------------% c if (j .eq. 1) go to 50 -c +c c %---------------------------------------------------------------- c | Otherwise need to B-orthogonalize the starting vector against | c | the current Arnoldi basis using Gram-Schmidt with iter. ref. | @@ -319,16 +312,16 @@ subroutine zgetv0 orth = .TRUE. 30 continue c - call zgemv ('C', n, j-1, one, v, ldv, workd, 1, + call zgemv ('C', n, j-1, one, v, ldv, workd, 1, & zero, workd(n+1), 1) - call zgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, + call zgemv ('N', n, j-1, -one, v, ldv, workd(n+1), 1, & one, resid, 1) -c +c c %----------------------------------------------------------% c | Compute the B-norm of the orthogonalized starting vector | c %----------------------------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call zcopy (n, resid, 1, workd(n+1), 1) @@ -339,17 +332,17 @@ subroutine zgetv0 else if (bmat .eq. 'I') then call zcopy (n, resid, 1, workd, 1) end if -c +c 40 continue c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c if (bmat .eq. 'G') then - cnorm = zdotc (n, resid, 1, workd, 1) - rnorm = sqrt(dlapy2(dble(cnorm),dimag(cnorm))) + cnorm = zzdotc (n, resid, 1, workd, 1) + rnorm = sqrt(dlapy2(dble(cnorm),aimag(cnorm))) else if (bmat .eq. 'I') then rnorm = dznrm2(n, resid, 1) end if @@ -359,14 +352,14 @@ subroutine zgetv0 c %--------------------------------------% c if (msglvl .gt. 2) then - call dvout (logfil, 1, rnorm0, ndigit, + call dvout (logfil, 1, [rnorm0], ndigit, & '_getv0: re-orthonalization ; rnorm0 is') - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_getv0: re-orthonalization ; rnorm is') end if c if (rnorm .gt. 0.717*rnorm0) go to 50 -c +c iter = iter + 1 if (iter .le. 1) then c @@ -388,11 +381,11 @@ subroutine zgetv0 rnorm = rzero ierr = -1 end if -c +c 50 continue c if (msglvl .gt. 0) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_getv0: B-norm of initial / restarted starting vector') end if if (msglvl .gt. 2) then @@ -400,10 +393,10 @@ subroutine zgetv0 & '_getv0: initial / restarted starting vector') end if ido = 99 -c - call second (t1) +c + call arscnd (t1) tgetv0 = tgetv0 + (t1 - t0) -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/zmout.f b/Toolbox/arpack-src/zmout.f index 9877aa833..c39f6defe 100644 --- a/Toolbox/arpack-src/zmout.f +++ b/Toolbox/arpack-src/zmout.f @@ -74,34 +74,34 @@ SUBROUTINE ZMOUT( LOUT, M, N, A, LDA, IDIGIT, IFMT ) IF (K1.NE.N) THEN WRITE( LOUT, 9994 )I, ( A( I, J ), J = K1, K2 ) ELSE - WRITE( LOUT, 9984 )I, ( A( I, J ), J = K1, K2 ) + WRITE( LOUT, 9984 )I, ( A( I, J ), J = K1, K2 ) END IF 30 CONTINUE 40 CONTINUE * ELSE IF( NDIGIT.LE.6 ) THEN - DO 60 K1 = 1, N, 2 + DO 60 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) WRITE( LOUT, 9997 )( ICOL, I, I = K1, K2 ) DO 50 I = 1, M IF (K1.NE.N) THEN WRITE( LOUT, 9993 )I, ( A( I, J ), J = K1, K2 ) - ELSE - WRITE( LOUT, 9983 )I, ( A( I, J ), J = K1, K2 ) + ELSE + WRITE( LOUT, 9983 )I, ( A( I, J ), J = K1, K2 ) END IF 50 CONTINUE 60 CONTINUE * ELSE IF( NDIGIT.LE.8 ) THEN - DO 80 K1 = 1, N, 2 + DO 80 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) WRITE( LOUT, 9996 )( ICOL, I, I = K1, K2 ) DO 70 I = 1, M IF (K1.NE.N) THEN WRITE( LOUT, 9992 )I, ( A( I, J ), J = K1, K2 ) ELSE - WRITE( LOUT, 9982 )I, ( A( I, J ), J = K1, K2 ) - END IF + WRITE( LOUT, 9982 )I, ( A( I, J ), J = K1, K2 ) + END IF 70 CONTINUE 80 CONTINUE * @@ -124,20 +124,20 @@ SUBROUTINE ZMOUT( LOUT, M, N, A, LDA, IDIGIT, IFMT ) K2 = MIN0( N, K1+3 ) WRITE( LOUT, 9998 )( ICOL, I, I = K1, K2 ) DO 110 I = 1, M - IF ((K1+3).LE.N) THEN + IF ((K1+3).LE.N) THEN WRITE( LOUT, 9974 )I, ( A( I, J ), J = K1, K2 ) ELSE IF ((K1+3-N).EQ.1) THEN WRITE( LOUT, 9964 )I, ( A( I, J ), J = k1, K2 ) ELSE IF ((K1+3-N).EQ.2) THEN WRITE( LOUT, 9954 )I, ( A( I, J ), J = K1, K2 ) ELSE IF ((K1+3-N).EQ.3) THEN - WRITE( LOUT, 9944 )I, ( A( I, J ), J = K1, K2 ) + WRITE( LOUT, 9944 )I, ( A( I, J ), J = K1, K2 ) END IF 110 CONTINUE 120 CONTINUE * ELSE IF( NDIGIT.LE.6 ) THEN - DO 140 K1 = 1, N, 3 + DO 140 K1 = 1, N, 3 K2 = MIN0( N, K1+ 2) WRITE( LOUT, 9997 )( ICOL, I, I = K1, K2 ) DO 130 I = 1, M @@ -185,14 +185,14 @@ SUBROUTINE ZMOUT( LOUT, M, N, A, LDA, IDIGIT, IFMT ) 9998 FORMAT( 11X, 4( 9X, 3A1, I4, 9X ) ) 9997 FORMAT( 10X, 4( 11X, 3A1, I4, 11X ) ) 9996 FORMAT( 10X, 3( 13X, 3A1, I4, 13X ) ) - 9995 FORMAT( 12X, 2( 18x, 3A1, I4, 18X ) ) + 9995 FORMAT( 12X, 2( 18x, 3A1, I4, 18X ) ) * *======================================================== * FORMAT FOR 72 COLUMN *======================================================== * * DISPLAY 4 SIGNIFICANT DIGITS -* +* 9994 FORMAT( 1X, ' Row', I4, ':', 1X, 1P,2('(',D10.3,',',D10.3,') ') ) 9984 FORMAT( 1X, ' Row', I4, ':', 1X, 1P,1('(',D10.3,',',D10.3,') ') ) * diff --git a/Toolbox/arpack-src/znaitr.f b/Toolbox/arpack-src/znaitr.f index 3b64ad4f2..240412ca0 100644 --- a/Toolbox/arpack-src/znaitr.f +++ b/Toolbox/arpack-src/znaitr.f @@ -2,8 +2,8 @@ c c\Name: znaitr c -c\Description: -c Reverse communication interface for applying NP additional steps to +c\Description: +c Reverse communication interface for applying NP additional steps to c a K step nonsymmetric Arnoldi factorization. c c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T @@ -19,7 +19,7 @@ c c\Usage: c call znaitr -c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, +c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH, c IPNTR, WORKD, INFO ) c c\Arguments @@ -61,8 +61,8 @@ c Number of additional Arnoldi steps to take. c c NB Integer. (INPUT) -c Blocksize to be used in the recurrence. -c Only work for NB = 1 right now. The goal is to have a +c Blocksize to be used in the recurrence. +c Only work for NB = 1 right now. The goal is to have a c program that implement both the block and non-block method. c c RESID Complex*16 array of length N. (INPUT/OUTPUT) @@ -74,37 +74,37 @@ c B-norm of the updated residual r_{k+p} on output. c c V Complex*16 N by K+NP array. (INPUT/OUTPUT) -c On INPUT: V contains the Arnoldi vectors in the first K +c On INPUT: V contains the Arnoldi vectors in the first K c columns. c On OUTPUT: V contains the new NP Arnoldi vectors in the next c NP columns. The first K columns are unchanged. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Complex*16 (K+NP) by (K+NP) array. (INPUT/OUTPUT) c H is used to store the generated upper Hessenberg matrix. c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORK for +c Pointer to mark the starting locations in the WORK for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Complex*16 work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The calling program should not +c for reverse communication. The calling program should not c use WORKD as temporary workspace during the iteration !!!!!! -c On input, WORKD(1:N) = B*RESID and is used to save some +c On input, WORKD(1:N) = B*RESID and is used to save some c computation at the first step. c c INFO Integer. (OUTPUT) @@ -124,14 +124,14 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: c zgetv0 ARPACK routine to generate the initial vector. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c zmout ARPACK utility routine that prints matrices c zvout ARPACK utility routine that prints vectors. c zlanhs LAPACK routine that computes various norms of a matrix. @@ -143,29 +143,29 @@ c zgemv Level 2 BLAS routine for matrix vector multiplication. c zaxpy Level 1 BLAS that computes a vector triad. c zcopy Level 1 BLAS that copies one vector to another . -c zdotc Level 1 BLAS that computes the scalar product of two vectors. +c zdotc Level 1 BLAS that computes the scalar product of two vectors. c zscal Level 1 BLAS that scales a vector. -c zdscal Level 1 BLAS that scales a complex vector by a real number. +c zdscal Level 1 BLAS that scales a complex vector by a real number. c dznrm2 Level 1 BLAS that computes the norm of a vector. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University -c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas -c +c Dept. of Computational & Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas +c c\SCCS Information: @(#) c FILE: naitr.F SID: 2.3 DATE OF SID: 8/27/96 RELEASE: 2 c c\Remarks c The algorithm implemented is: -c +c c restart = .false. -c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; +c Given V_{k} = [v_{1}, ..., v_{k}], r_{k}; c r_{k} contains the initial residual vector even for k = 0; -c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already +c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already c computed by the calling program. c c betaj = rnorm ; p_{k+1} = B*r_{k} ; @@ -173,7 +173,7 @@ c 1) if ( betaj < tol ) stop or restart depending on j. c ( At present tol is zero ) c if ( restart ) generate a new starting vector. -c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; +c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}]; c p_{j} = p_{j}/betaj c 3) r_{j} = OP*v_{j} where OP is defined as in znaupd c For shift-invert mode p_{j} = B*v_{j} is already available. @@ -188,7 +188,7 @@ c 5) Re-orthogonalization step: c s = V_{j}'*B*r_{j} c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} || -c alphaj = alphaj + s_{j}; +c alphaj = alphaj + s_{j}; c 6) Iterative refinement step: c If (rnorm1 > 0.717*rnorm) then c rnorm = rnorm1 @@ -198,7 +198,7 @@ c If this is the first time in step 6), go to 5) c Else r_{j} lies in the span of V_{j} numerically. c Set r_{j} = 0 and rnorm = 0; go to 1) -c EndIf +c EndIf c End Do c c\EndLib @@ -206,15 +206,15 @@ c----------------------------------------------------------------------- c subroutine znaitr - & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, + & (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh, & ipntr, workd, info) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -241,7 +241,7 @@ subroutine znaitr & one, zero Double precision & rone, rzero - parameter (one = (1.0D+0, 0.0D+0), zero = (0.0D+0, 0.0D+0), + parameter (one = (1.0D+0, 0.0D+0), zero = (0.0D+0, 0.0D+0), & rone = 1.0D+0, rzero = 0.0D+0) c c %--------------% @@ -258,7 +258,7 @@ subroutine znaitr logical first, orth1, orth2, rstart, step3, step4 integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl, & jj - Double precision + Double precision & ovfl, smlnum, tst1, ulp, unfl, betaj, & temp1, rnorm1, wnorm Complex*16 @@ -272,24 +272,24 @@ subroutine znaitr c | External Subroutines | c %----------------------% c - external zaxpy, zcopy, zscal, zdscal, zgemv, zgetv0, - & dlabad, zvout, zmout, ivout, second + external zaxpy, zcopy, zscal, zdscal, zgemv, zgetv0, + & dlabad, zvout, zmout, ivout, arscnd c c %--------------------% c | External Functions | c %--------------------% c Complex*16 - & zdotc - Double precision + & zzdotc + Double precision & dlamch, dznrm2, zlanhs, dlapy2 - external zdotc, dznrm2, zlanhs, dlamch, dlapy2 + external zzdotc, dznrm2, zlanhs, dlamch, dlapy2 c c %---------------------% c | Intrinsic Functions | c %---------------------% c - intrinsic dimag, dble, max, sqrt + intrinsic aimag, dble, max, sqrt c c %-----------------% c | Data statements | @@ -320,15 +320,15 @@ subroutine znaitr end if c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mcaitr -c +c c %------------------------------% c | Initial call to this routine | c %------------------------------% @@ -344,7 +344,7 @@ subroutine znaitr irj = ipj + n ivj = irj + n end if -c +c c %-------------------------------------------------% c | When in reverse communication mode one of: | c | STEP3, STEP4, ORTH1, ORTH2, RSTART | @@ -374,16 +374,16 @@ subroutine znaitr c | | c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | c %--------------------------------------------------------------% - + 1000 continue c if (msglvl .gt. 1) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: generating Arnoldi vector number') - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_naitr: B-norm of the current residual is') end if -c +c c %---------------------------------------------------% c | STEP 1: Check if the B norm of j-th residual | c | vector is zero. Equivalent to determine whether | @@ -400,16 +400,16 @@ subroutine znaitr c %---------------------------------------------------% c if (msglvl .gt. 0) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: ****** RESTART AT STEP ******') end if -c +c c %---------------------------------------------% c | ITRY is the loop variable that controls the | c | maximum amount of times that a restart is | c | attempted. NRSTRT is used by stat.h | c %---------------------------------------------% -c +c betaj = rzero nrstrt = nrstrt + 1 itry = 1 @@ -423,7 +423,7 @@ subroutine znaitr c | RSTART = .true. flow returns here. | c %--------------------------------------% c - call zgetv0 (ido, bmat, itry, .false., n, j, v, ldv, + call zgetv0 (ido, bmat, itry, .false., n, j, v, ldv, & resid, rnorm, ipntr, workd, ierr) if (ido .ne. 99) go to 9000 if (ierr .lt. 0) then @@ -437,12 +437,12 @@ subroutine znaitr c %------------------------------------------------% c info = j - 1 - call second (t1) + call arscnd (t1) tcaitr = tcaitr + (t1 - t0) ido = 99 go to 9000 end if -c +c 40 continue c c %---------------------------------------------------------% @@ -466,7 +466,7 @@ subroutine znaitr c call zlascl ('General', i, i, rnorm, rone, & n, 1, v(1,j), n, infol) - call zlascl ('General', i, i, rnorm, rone, + call zlascl ('General', i, i, rnorm, rone, & n, 1, workd(ipj), n, infol) end if c @@ -477,29 +477,29 @@ subroutine znaitr c step3 = .true. nopx = nopx + 1 - call second (t2) + call arscnd (t2) call zcopy (n, v(1,j), 1, workd(ivj), 1) ipntr(1) = ivj ipntr(2) = irj ipntr(3) = ipj ido = 1 -c +c c %-----------------------------------% c | Exit in order to compute OP*v_{j} | c %-----------------------------------% -c - go to 9000 +c + go to 9000 50 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | c | if step3 = .true. | c %----------------------------------% c - call second (t3) + call arscnd (t3) tmvopx = tmvopx + (t3 - t2) - + step3 = .false. c c %------------------------------------------% @@ -507,30 +507,30 @@ subroutine znaitr c %------------------------------------------% c call zcopy (n, workd(irj), 1, resid, 1) -c +c c %---------------------------------------% c | STEP 4: Finish extending the Arnoldi | c | factorization to length j. | c %---------------------------------------% c - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 step4 = .true. ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-------------------------------------% c | Exit in order to compute B*OP*v_{j} | c %-------------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call zcopy (n, resid, 1, workd(ipj), 1) end if 60 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | @@ -538,10 +538,10 @@ subroutine znaitr c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c step4 = .false. c c %-------------------------------------% @@ -549,9 +549,9 @@ subroutine znaitr c | Compute the B-norm of OP*v_{j}. | c %-------------------------------------% c - if (bmat .eq. 'G') then - cnorm = zdotc (n, resid, 1, workd(ipj), 1) - wnorm = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) ) + if (bmat .eq. 'G') then + cnorm = zzdotc (n, resid, 1, workd(ipj), 1) + wnorm = sqrt( dlapy2(dble(cnorm),aimag(cnorm)) ) else if (bmat .eq. 'I') then wnorm = dznrm2(n, resid, 1) end if @@ -569,65 +569,65 @@ subroutine znaitr c | Compute the j Fourier coefficients w_{j} | c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | c %------------------------------------------% -c +c call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1, & zero, h(1,j), 1) c c %--------------------------------------% c | Orthogonalize r_{j} against V_{j}. | -c | RESID contains OP*v_{j}. See STEP 3. | +c | RESID contains OP*v_{j}. See STEP 3. | c %--------------------------------------% c call zgemv ('N', n, j, -one, v, ldv, h(1,j), 1, & one, resid, 1) c - if (j .gt. 1) h(j,j-1) = dcmplx(betaj, rzero) + if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero, Kind=Kind(0d0)) +c + call arscnd (t4) c - call second (t4) -c orth1 = .true. -c - call second (t2) +c + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call zcopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*r_{j} | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call zcopy (n, resid, 1, workd(ipj), 1) - end if + end if 70 continue -c +c c %---------------------------------------------------% c | Back from reverse communication if ORTH1 = .true. | c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c +c orth1 = .false. c c %------------------------------% c | Compute the B-norm of r_{j}. | c %------------------------------% c - if (bmat .eq. 'G') then - cnorm = zdotc (n, resid, 1, workd(ipj), 1) - rnorm = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) ) + if (bmat .eq. 'G') then + cnorm = zzdotc (n, resid, 1, workd(ipj), 1) + rnorm = sqrt( dlapy2(dble(cnorm),aimag(cnorm)) ) else if (bmat .eq. 'I') then rnorm = dznrm2(n, resid, 1) end if -c +c c %-----------------------------------------------------------% c | STEP 5: Re-orthogonalization / Iterative refinement phase | c | Maximum NITER_ITREF tries. | @@ -650,20 +650,20 @@ subroutine znaitr c iter = 0 nrorth = nrorth + 1 -c +c c %---------------------------------------------------% c | Enter the Iterative refinement phase. If further | c | refinement is necessary, loop back here. The loop | c | variable is ITER. Perform a step of Classical | c | Gram-Schmidt using all the Arnoldi vectors V_{j} | c %---------------------------------------------------% -c +c 80 continue c if (msglvl .gt. 2) then rtemp(1) = wnorm rtemp(2) = rnorm - call dvout (logfil, 2, rtemp, ndigit, + call dvout (logfil, 2, rtemp, ndigit, & '_naitr: re-orthogonalization; wnorm and rnorm are') call zvout (logfil, j, h(1,j), ndigit, & '_naitr: j-th column of H') @@ -674,7 +674,7 @@ subroutine znaitr c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | c %----------------------------------------------------% c - call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1, + call zgemv ('C', n, j, one, v, ldv, workd(ipj), 1, & zero, workd(irj), 1) c c %---------------------------------------------% @@ -684,28 +684,28 @@ subroutine znaitr c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | c %---------------------------------------------% c - call zgemv ('N', n, j, -one, v, ldv, workd(irj), 1, + call zgemv ('N', n, j, -one, v, ldv, workd(irj), 1, & one, resid, 1) call zaxpy (j, one, workd(irj), 1, h(1,j), 1) -c +c orth2 = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call zcopy (n, resid, 1, workd(irj), 1) ipntr(1) = irj ipntr(2) = ipj ido = 2 -c +c c %-----------------------------------% c | Exit in order to compute B*r_{j}. | c | r_{j} is the corrected residual. | c %-----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call zcopy (n, resid, 1, workd(ipj), 1) - end if + end if 90 continue c c %---------------------------------------------------% @@ -713,23 +713,23 @@ subroutine znaitr c %---------------------------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) - end if + end if c c %-----------------------------------------------------% c | Compute the B-norm of the corrected residual r_{j}. | c %-----------------------------------------------------% -c - if (bmat .eq. 'G') then - cnorm = zdotc (n, resid, 1, workd(ipj), 1) - rnorm1 = sqrt( dlapy2(dble(cnorm),dimag(cnorm)) ) +c + if (bmat .eq. 'G') then + cnorm = zzdotc (n, resid, 1, workd(ipj), 1) + rnorm1 = sqrt( dlapy2(dble(cnorm),aimag(cnorm)) ) else if (bmat .eq. 'I') then rnorm1 = dznrm2(n, resid, 1) end if -c +c if (msglvl .gt. 0 .and. iter .gt. 0 ) then - call ivout (logfil, 1, j, ndigit, + call ivout (logfil, 1, [j], ndigit, & '_naitr: Iterative refinement for Arnoldi residual') if (msglvl .gt. 2) then rtemp(1) = rnorm @@ -757,7 +757,7 @@ subroutine znaitr c %---------------------------------------% c rnorm = rnorm1 -c +c else c c %-------------------------------------------% @@ -776,55 +776,55 @@ subroutine znaitr c do 95 jj = 1, n resid(jj) = zero - 95 continue + 95 continue rnorm = rzero end if -c +c c %----------------------------------------------% c | Branch here directly if iterative refinement | c | wasn't necessary or after at most NITER_REF | c | steps of iterative refinement. | c %----------------------------------------------% -c +c 100 continue -c +c rstart = .false. orth2 = .false. -c - call second (t5) +c + call arscnd (t5) titref = titref + (t5 - t4) -c +c c %------------------------------------% c | STEP 6: Update j = j+1; Continue | c %------------------------------------% c j = j + 1 if (j .gt. k+np) then - call second (t1) + call arscnd (t1) tcaitr = tcaitr + (t1 - t0) ido = 99 do 110 i = max(1,k), k+np-1 -c +c c %--------------------------------------------% c | Check for splitting and deflation. | c | Use a standard test as in the QR algorithm | c | REFERENCE: LAPACK subroutine zlahqr | c %--------------------------------------------% -c - tst1 = dlapy2(dble(h(i,i)),dimag(h(i,i))) - & + dlapy2(dble(h(i+1,i+1)), dimag(h(i+1,i+1))) +c + tst1 = dlapy2(dble(h(i,i)),aimag(h(i,i))) + & + dlapy2(dble(h(i+1,i+1)), aimag(h(i+1,i+1))) if( tst1.eq.dble(zero) ) & tst1 = zlanhs( '1', k+np, h, ldh, workd(n+1) ) - if( dlapy2(dble(h(i+1,i)),dimag(h(i+1,i))) .le. - & max( ulp*tst1, smlnum ) ) + if( dlapy2(dble(h(i+1,i)),aimag(h(i+1,i))) .le. + & max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 110 continue -c +c if (msglvl .gt. 2) then - call zmout (logfil, k+np, k+np, h, ldh, ndigit, + call zmout (logfil, k+np, k+np, h, ldh, ndigit, & '_naitr: Final upper Hessenberg matrix H of order K+NP') end if -c +c go to 9000 end if c @@ -833,7 +833,7 @@ subroutine znaitr c %--------------------------------------------------------% c go to 1000 -c +c c %---------------------------------------------------------------% c | | c | E N D O F M A I N I T E R A T I O N L O O P | diff --git a/Toolbox/arpack-src/znapps.f b/Toolbox/arpack-src/znapps.f index 48f421346..792fe6168 100644 --- a/Toolbox/arpack-src/znapps.f +++ b/Toolbox/arpack-src/znapps.f @@ -19,7 +19,7 @@ c c\Usage: c call znapps -c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, +c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, c WORKL, WORKD ) c c\Arguments @@ -28,7 +28,7 @@ c c KEV Integer. (INPUT/OUTPUT) c KEV+NP is the size of the input matrix H. -c KEV is the size of the updated matrix HNEW. +c KEV is the size of the updated matrix HNEW. c c NP Integer. (INPUT) c Number of implicit shifts to be applied. @@ -46,7 +46,7 @@ c program. c c H Complex*16 (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT) -c On INPUT, H contains the current KEV+NP by KEV+NP upper +c On INPUT, H contains the current KEV+NP by KEV+NP upper c Hessenberg matrix of the Arnoldi factorization. c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg c matrix in the KEV leading submatrix. @@ -57,7 +57,7 @@ c c RESID Complex*16 array of length N. (INPUT/OUTPUT) c On INPUT, RESID contains the the residual vector r_{k+p}. -c On OUTPUT, RESID is the update residual vector rnew_{k} +c On OUTPUT, RESID is the update residual vector rnew_{k} c in the first KEV locations. c c Q Complex*16 KEV+NP by KEV+NP work array. (WORKSPACE) @@ -92,7 +92,7 @@ c c\Routines called: c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c zmout ARPACK utility routine that prints matrices c zvout ARPACK utility routine that prints vectors. c zlacpy LAPACK matrix copy routine. @@ -112,9 +112,9 @@ c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: napps.F SID: 2.3 DATE OF SID: 3/28/97 RELEASE: 2 @@ -132,15 +132,15 @@ c----------------------------------------------------------------------- c subroutine znapps - & ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, + & ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, & workl, workd ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -153,7 +153,7 @@ subroutine znapps c %-----------------% c Complex*16 - & h(ldh,kev+np), resid(n), shift(np), + & h(ldh,kev+np), resid(n), shift(np), & v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np) c c %------------% @@ -175,22 +175,22 @@ subroutine znapps logical first Complex*16 & cdum, f, g, h11, h21, r, s, sigma, t - Double precision + Double precision & c, ovfl, smlnum, ulp, unfl, tst1 - save first, ovfl, smlnum, ulp, unfl + save first, ovfl, smlnum, ulp, unfl c c %----------------------% c | External Subroutines | c %----------------------% c - external zaxpy, zcopy, zgemv, zscal, zlacpy, zlartg, - & zvout, zlaset, dlabad, zmout, second, ivout + external zaxpy, zcopy, zgemv, zscal, zlacpy, zlartg, + & zvout, zlaset, dlabad, zmout, arscnd, ivout c c %--------------------% c | External Functions | c %--------------------% c - Double precision + Double precision & zlanhs, dlamch, dlapy2 external zlanhs, dlamch, dlapy2 c @@ -198,18 +198,18 @@ subroutine znapps c | Intrinsics Functions | c %----------------------% c - intrinsic abs, dimag, conjg, dcmplx, max, min, dble + intrinsic abs, aimag, conjg, cmplx, max, min, dble c c %---------------------% c | Statement Functions | c %---------------------% c - Double precision + Double precision & zabs1 - zabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) ) + zabs1( cdum ) = abs( dble( cdum ) ) + abs( aimag( cdum ) ) c c %----------------% -c | Data statments | +c | Data statements | c %----------------% c data first / .true. / @@ -240,11 +240,11 @@ subroutine znapps c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mcapps -c - kplusp = kev + np -c +c + kplusp = kev + np +c c %--------------------------------------------% c | Initialize Q to the identity to accumulate | c | the rotations and reflections | @@ -268,9 +268,9 @@ subroutine znapps sigma = shift(jj) c if (msglvl .gt. 2 ) then - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: shift number.') - call zvout (logfil, 1, sigma, ndigit, + call zvout (logfil, 1, [sigma], ndigit, & '_napps: Value of the shift ') end if c @@ -288,14 +288,14 @@ subroutine znapps tst1 = zabs1( h( i, i ) ) + zabs1( h( i+1, i+1 ) ) if( tst1.eq.rzero ) & tst1 = zlanhs( '1', kplusp-jj+1, h, ldh, workl ) - if ( abs(dble(h(i+1,i))) + if ( abs(dble(h(i+1,i))) & .le. max(ulp*tst1, smlnum) ) then if (msglvl .gt. 0) then - call ivout (logfil, 1, i, ndigit, + call ivout (logfil, 1, [i], ndigit, & '_napps: matrix splitting at row/column no.') - call ivout (logfil, 1, jj, ndigit, + call ivout (logfil, 1, [jj], ndigit, & '_napps: matrix splitting with shift number.') - call zvout (logfil, 1, h(i+1,i), ndigit, + call zvout (logfil, 1, h(i+1,i), ndigit, & '_napps: off diagonal element.') end if iend = i @@ -307,9 +307,9 @@ subroutine znapps 40 continue c if (msglvl .gt. 2) then - call ivout (logfil, 1, istart, ndigit, + call ivout (logfil, 1, [istart], ndigit, & '_napps: Start of current block ') - call ivout (logfil, 1, iend, ndigit, + call ivout (logfil, 1, [iend], ndigit, & '_napps: End of current block ') end if c @@ -325,7 +325,7 @@ subroutine znapps h21 = h(istart+1,istart) f = h11 - sigma g = h21 -c +c do 80 i = istart, iend-1 c c %------------------------------------------------------% @@ -345,7 +345,7 @@ subroutine znapps do 50 j = i, kplusp t = c*h(i,j) + s*h(i+1,j) h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j) - h(i,j) = t + h(i,j) = t 50 continue c c %---------------------------------------------% @@ -355,7 +355,7 @@ subroutine znapps do 60 j = 1, min(i+2,iend) t = c*h(j,i) + conjg(s)*h(j,i+1) h(j,i+1) = -s*h(j,i) + c*h(j,i+1) - h(j,i) = t + h(j,i) = t 60 continue c c %-----------------------------------------------------% @@ -365,7 +365,7 @@ subroutine znapps do 70 j = 1, min(i+jj, kplusp) t = c*q(j,i) + conjg(s)*q(j,i+1) q(j,i+1) = - s*q(j,i) + c*q(j,i+1) - q(j,i) = t + q(j,i) = t 70 continue c c %---------------------------% @@ -381,7 +381,7 @@ subroutine znapps c %-------------------------------% c | Finished applying the shift. | c %-------------------------------% -c +c 100 continue c c %---------------------------------------------------------% @@ -405,12 +405,12 @@ subroutine znapps c do 120 j=1,kev if ( dble( h(j+1,j) ) .lt. rzero .or. - & dimag( h(j+1,j) ) .ne. rzero ) then - t = h(j+1,j) / dlapy2(dble(h(j+1,j)),dimag(h(j+1,j))) + & aimag( h(j+1,j) ) .ne. rzero ) then + t = h(j+1,j) / dlapy2(dble(h(j+1,j)),aimag(h(j+1,j))) call zscal( kplusp-j+1, conjg(t), h(j+1,j), ldh ) call zscal( min(j+2, kplusp), t, h(1,j+1), 1 ) call zscal( min(j+np+1,kplusp), t, q(1,j+1), 1 ) - h(j+1,j) = dcmplx( dble( h(j+1,j) ), rzero ) + h(j+1,j) = cmplx( dble( h(j+1,j) ), rzero, Kind=Kind(0d0) ) end if 120 continue c @@ -428,7 +428,7 @@ subroutine znapps tst1 = zabs1( h( i, i ) ) + zabs1( h( i+1, i+1 ) ) if( tst1 .eq. rzero ) & tst1 = zlanhs( '1', kev, h, ldh, workl ) - if( dble( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) ) + if( dble( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) ) & h(i+1,i) = zero 130 continue c @@ -441,9 +441,9 @@ subroutine znapps c %-------------------------------------------------% c if ( dble( h(kev+1,kev) ) .gt. rzero ) - & call zgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, + & call zgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, & workd(n+1), 1) -c +c c %----------------------------------------------------------% c | Compute column 1 to kev of (V*Q) in backward order | c | taking advantage of the upper Hessenberg structure of Q. | @@ -460,14 +460,14 @@ subroutine znapps c %-------------------------------------------------% c call zlacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv) -c +c c %--------------------------------------------------------------% c | Copy the (kev+1)-st column of (V*Q) in the appropriate place | c %--------------------------------------------------------------% c if ( dble( h(kev+1,kev) ) .gt. rzero ) & call zcopy (n, workd(n+1), 1, v(1,kev+1), 1) -c +c c %-------------------------------------% c | Update the residual vector: | c | r <- sigmak*r + betak*v(:,kev+1) | @@ -485,7 +485,7 @@ subroutine znapps & '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}') call zvout (logfil, 1, h(kev+1,kev), ndigit, & '_napps: betak = e_{kev+1}^T*H*e_{kev}') - call ivout (logfil, 1, kev, ndigit, + call ivout (logfil, 1, [kev], ndigit, & '_napps: Order of the final Hessenberg matrix ') if (msglvl .gt. 2) then call zmout (logfil, kev, kev, h, ldh, ndigit, @@ -495,9 +495,9 @@ subroutine znapps end if c 9000 continue - call second (t1) + call arscnd (t1) tcapps = tcapps + (t1 - t0) -c +c return c c %---------------% diff --git a/Toolbox/arpack-src/znaup2.f b/Toolbox/arpack-src/znaup2.f index 3d196a264..0ab01dd0e 100644 --- a/Toolbox/arpack-src/znaup2.f +++ b/Toolbox/arpack-src/znaup2.f @@ -1,14 +1,14 @@ c\BeginDoc c -c\Name: znaup2 +c\Name: znaup2 c -c\Description: +c\Description: c Intermediate level interface called by znaupd . c c\Usage: -c call znaup2 +c call znaup2 c ( IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD, -c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, +c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, c Q, LDQ, WORKL, IPNTR, WORKD, RWORK, INFO ) c c\Arguments @@ -26,7 +26,7 @@ c The logic for adjusting is contained within the current c subroutine. c If ISHIFT=0, NP is the number of shifts the user needs -c to provide via reverse comunication. 0 < NP < NCV-NEV. +c to provide via reverse communication. 0 < NP < NCV-NEV. c NP may be less than NCV-NEV since a leading block of the current c upper Hessenberg matrix has split off and contains "unwanted" c Ritz values. @@ -38,27 +38,27 @@ c IUPD .NE. 0: use implicit update. c c V Complex*16 N by (NEV+NP) array. (INPUT/OUTPUT) -c The Arnoldi basis vectors are returned in the first NEV +c The Arnoldi basis vectors are returned in the first NEV c columns of V. c c LDV Integer. (INPUT) -c Leading dimension of V exactly as declared in the calling +c Leading dimension of V exactly as declared in the calling c program. c c H Complex*16 (NEV+NP) by (NEV+NP) array. (OUTPUT) c H is used to store the generated upper Hessenberg matrix c c LDH Integer. (INPUT) -c Leading dimension of H exactly as declared in the calling +c Leading dimension of H exactly as declared in the calling c program. c c RITZ Complex*16 array of length NEV+NP. (OUTPUT) c RITZ(1:NEV) contains the computed Ritz values of OP. c c BOUNDS Complex*16 array of length NEV+NP. (OUTPUT) -c BOUNDS(1:NEV) contain the error bounds corresponding to +c BOUNDS(1:NEV) contain the error bounds corresponding to c the computed Ritz values. -c +c c Q Complex*16 (NEV+NP) by (NEV+NP) array. (WORKSPACE) c Private (replicated) work array used to accumulate the c rotation in the shift application step. @@ -67,7 +67,7 @@ c Leading dimension of Q exactly as declared in the calling c program. c -c WORKL Complex*16 work array of length at least +c WORKL Complex*16 work array of length at least c (NEV+NP)**2 + 3*(NEV+NP). (WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. It is used in shifts calculation, shifts @@ -75,15 +75,15 @@ c c c IPNTR Integer array of length 3. (OUTPUT) -c Pointer to mark the starting locations in the WORKD for +c Pointer to mark the starting locations in the WORKD for c vectors used by the Arnoldi iteration. c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X. c IPNTR(2): pointer to the current result vector Y. -c IPNTR(3): pointer to the vector B * X when used in the +c IPNTR(3): pointer to the vector B * X when used in the c shift-and-invert mode. X is the current operand. c ------------------------------------------------------------- -c +c c WORKD Complex*16 work array of length 3*N. (WORKSPACE) c Distributed array to be used in the basic Arnoldi iteration c for reverse communication. The user should not use WORKD @@ -101,7 +101,7 @@ c Error flag on output. c = 0: Normal return. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. +c All possible eigenvalues of OP has been found. c NP returns the number of converged Ritz values. c = 2: No shifts could be applied. c = -8: Error return from LAPACK eigenvalue calculation; @@ -117,32 +117,32 @@ c\BeginLib c c\Local variables: -c xxxxxx Complex*16 +c xxxxxx Complex*16 c c\References: c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c c\Routines called: -c zgetv0 ARPACK initial vector generation routine. +c zgetv0 ARPACK initial vector generation routine. c znaitr ARPACK Arnoldi factorization routine. c znapps ARPACK application of implicit shifts routine. -c zneigh ARPACK compute Ritz values and error bounds routine. +c zneigh ARPACK compute Ritz values and error bounds routine. c zngets ARPACK reorder Ritz values and error bounds routine. c zsortc ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c zmout ARPACK utility routine that prints matrices c zvout ARPACK utility routine that prints vectors. c dvout ARPACK utility routine that prints vectors. c dlamch LAPACK routine that determines machine constants. c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. c zcopy Level 1 BLAS that copies one vector to another . -c zdotc Level 1 BLAS that computes the scalar product of two vectors. +c zdotc Level 1 BLAS that computes the scalar product of two vectors. c zswap Level 1 BLAS that swaps two vectors. c dznrm2 Level 1 BLAS that computes the norm of a vector. c @@ -151,10 +151,10 @@ c Richard Lehoucq CRPC / Rice Universitya c Chao Yang Houston, Texas c Dept. of Computational & -c Applied Mathematics -c Rice University -c Houston, Texas -c +c Applied Mathematics +c Rice University +c Houston, Texas +c c\SCCS Information: @(#) c FILE: naup2.F SID: 2.6 DATE OF SID: 06/01/00 RELEASE: 2 c @@ -165,17 +165,17 @@ c c----------------------------------------------------------------------- c - subroutine znaup2 - & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, + subroutine znaup2 + & ( ido, bmat, n, which, nev, np, tol, resid, mode, iupd, + & ishift, mxiter, v, ldv, h, ldh, ritz, bounds, & q, ldq, workl, ipntr, workd, rwork, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -184,7 +184,7 @@ subroutine znaup2 character bmat*1, which*2 integer ido, info, ishift, iupd, mode, ldh, ldq, ldv, mxiter, & n, nev, np - Double precision + Double precision & tol c c %-----------------% @@ -192,20 +192,20 @@ subroutine znaup2 c %-----------------% c integer ipntr(13) - Complex*16 - & bounds(nev+np), h(ldh,nev+np), q(ldq,nev+np), - & resid(n), ritz(nev+np), v(ldv,nev+np), + Complex*16 + & bounds(nev+np), h(ldh,nev+np), q(ldq,nev+np), + & resid(n), ritz(nev+np), v(ldv,nev+np), & workd(3*n), workl( (nev+np)*(nev+np+3) ) - Double precision + Double precision & rwork(nev+np) c c %------------% c | Parameters | c %------------% c - Complex*16 + Complex*16 & one, zero - Double precision + Double precision & rzero parameter (one = (1.0D+0, 0.0D+0) , zero = (0.0D+0, 0.0D+0) , & rzero = 0.0D+0 ) @@ -215,16 +215,16 @@ subroutine znaup2 c %---------------% c logical cnorm , getv0, initv , update, ushift - integer ierr , iter , kplusp, msglvl, nconv, + integer ierr , iter , kplusp, msglvl, nconv, & nevbef, nev0 , np0 , nptemp, i , - & j - Complex*16 + & j + Complex*16 & cmpnorm - Double precision + Double precision & rnorm , eps23, rtemp character wprime*2 c - save cnorm, getv0, initv , update, ushift, + save cnorm, getv0, initv , update, ushift, & rnorm, iter , kplusp, msglvl, nconv , & nevbef, nev0 , np0 , eps23 c @@ -240,34 +240,34 @@ subroutine znaup2 c %----------------------% c external zcopy , zgetv0 , znaitr , zneigh , zngets , znapps , - & zsortc , zswap , zmout , zvout , ivout, second + & zsortc , zswap , zmout , zvout , ivout, arscnd c c %--------------------% c | External functions | c %--------------------% c - Complex*16 - & zdotc - Double precision - & dznrm2 , dlamch , dlapy2 - external zdotc , dznrm2 , dlamch , dlapy2 + Complex*16 + & zzdotc + Double precision + & dznrm2 , dlamch , dlapy2 + external zzdotc , dznrm2 , dlamch , dlapy2 c c %---------------------% c | Intrinsic Functions | c %---------------------% c - intrinsic dimag , dble , min, max + intrinsic aimag , dble , min, max c c %-----------------------% c | Executable Statements | c %-----------------------% c if (ido .eq. 0) then -c - call second (t0) -c +c + call arscnd (t0) +c msglvl = mcaup2 -c +c nev0 = nev np0 = np c @@ -283,7 +283,7 @@ subroutine znaup2 kplusp = nev + np nconv = 0 iter = 0 -c +c c %---------------------------------% c | Get machine dependent constant. | c %---------------------------------% @@ -313,7 +313,7 @@ subroutine znaup2 initv = .false. end if end if -c +c c %---------------------------------------------% c | Get a possibly random starting vector and | c | force it into the range of the operator OP. | @@ -330,7 +330,7 @@ subroutine znaup2 if (rnorm .eq. rzero) then c c %-----------------------------------------% -c | The initial vector is zero. Error exit. | +c | The initial vector is zero. Error exit. | c %-----------------------------------------% c info = -9 @@ -339,7 +339,7 @@ subroutine znaup2 getv0 = .false. ido = 0 end if -c +c c %-----------------------------------% c | Back from reverse communication : | c | continue with update step | @@ -359,12 +359,12 @@ subroutine znaup2 c %-------------------------------------% c if (cnorm) go to 100 -c +c c %----------------------------------------------------------% c | Compute the first NEV steps of the Arnoldi factorization | c %----------------------------------------------------------% c - call znaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, + call znaitr (ido, bmat, n, 0, nev, mode, resid, rnorm, v, ldv, & h, ldh, ipntr, workd, info) c if (ido .ne. 99) go to 9000 @@ -375,7 +375,7 @@ subroutine znaup2 info = -9999 go to 1200 end if -c +c c %--------------------------------------------------------------% c | | c | M A I N ARNOLDI I T E R A T I O N L O O P | @@ -383,16 +383,16 @@ subroutine znaup2 c | factorization in place. | c | | c %--------------------------------------------------------------% -c +c 1000 continue c iter = iter + 1 c if (msglvl .gt. 0) then - call ivout (logfil, 1, iter, ndigit, + call ivout (logfil, 1, [iter], ndigit, & '_naup2: **** Start of major iteration number ****') end if -c +c c %-----------------------------------------------------------% c | Compute NP additional steps of the Arnoldi factorization. | c | Adjust NP since NEV might have been updated by last call | @@ -402,9 +402,9 @@ subroutine znaup2 np = kplusp - nev c if (msglvl .gt. 1) then - call ivout (logfil, 1, nev, ndigit, + call ivout (logfil, 1, [nev], ndigit, & '_naup2: The length of the current Arnoldi factorization') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naup2: Extend the Arnoldi factorization by') end if c @@ -430,10 +430,10 @@ subroutine znaup2 update = .false. c if (msglvl .gt. 1) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_naup2: Corresponding B-norm of the residual') end if -c +c c %--------------------------------------------------------% c | Compute the eigenvalues and corresponding error bounds | c | of the current upper Hessenberg matrix. | @@ -452,7 +452,7 @@ subroutine znaup2 c | to be used in the convergence test. | c | The wanted part of the spectrum and corresponding | c | error bounds are in the last NEV loc. of RITZ, | -c | and BOUNDS respectively. | +c | and BOUNDS respectively. | c %---------------------------------------------------% c nev = nev0 @@ -475,7 +475,7 @@ subroutine znaup2 c %---------------------------------------------------% c call zngets (ishift, which, nev, np, ritz, bounds) -c +c c %------------------------------------------------------------% c | Convergence test: currently we use the following criteria. | c | The relative accuracy of a Ritz value is considered | @@ -489,22 +489,22 @@ subroutine znaup2 c do 25 i = 1, nev rtemp = max( eps23, dlapy2 ( dble (ritz(np+i)), - & dimag (ritz(np+i)) ) ) - if ( dlapy2 (dble (bounds(np+i)),dimag (bounds(np+i))) + & aimag (ritz(np+i)) ) ) + if ( dlapy2 (dble (bounds(np+i)),aimag (bounds(np+i))) & .le. tol*rtemp ) then nconv = nconv + 1 end if 25 continue -c +c if (msglvl .gt. 2) then kp(1) = nev kp(2) = np kp(3) = nconv - call ivout (logfil, 3, kp, ndigit, + call ivout (logfil, 3, kp, ndigit, & '_naup2: NEV, NP, NCONV are') call zvout (logfil, kplusp, ritz, ndigit, & '_naup2: The eigenvalues of H') - call zvout (logfil, kplusp, bounds, ndigit, + call zvout (logfil, kplusp, bounds, ndigit, & '_naup2: Ritz estimates of the current NCV Ritz values') end if c @@ -525,8 +525,8 @@ subroutine znaup2 nev = nev + 1 end if 30 continue -c - if ( (nconv .ge. nev0) .or. +c + if ( (nconv .ge. nev0) .or. & (iter .gt. mxiter) .or. & (np .eq. 0) ) then c @@ -537,7 +537,7 @@ subroutine znaup2 & ndigit, & '_naup2: Ritz estimates computed by _neigh:') end if -c +c c %------------------------------------------------% c | Prepare to exit. Put the converged Ritz values | c | and corresponding bounds in RITZ(1:NCONV) and | @@ -550,7 +550,7 @@ subroutine znaup2 c | rnorm to zneupd if needed | c %------------------------------------------% - h(3,1) = dcmplx (rnorm,rzero) + h(3,1) = cmplx (rnorm,rzero,Kind=Kind(0d0)) c c %----------------------------------------------% c | Sort Ritz values so that converged Ritz | @@ -573,9 +573,9 @@ subroutine znaup2 c | by 1 / max(eps23, magnitude of the Ritz value). | c %--------------------------------------------------% c - do 35 j = 1, nev0 + do 35 j = 1, nev0 rtemp = max( eps23, dlapy2 ( dble (ritz(j)), - & dimag (ritz(j)) ) ) + & aimag (ritz(j)) ) ) bounds(j) = bounds(j)/rtemp 35 continue c @@ -596,7 +596,7 @@ subroutine znaup2 c do 40 j = 1, nev0 rtemp = max( eps23, dlapy2 ( dble (ritz(j)), - & dimag (ritz(j)) ) ) + & aimag (ritz(j)) ) ) bounds(j) = bounds(j)*rtemp 40 continue c @@ -616,13 +616,13 @@ subroutine znaup2 end if c c %------------------------------------% -c | Max iterations have been exceeded. | +c | Max iterations have been exceeded. | c %------------------------------------% c if (iter .gt. mxiter .and. nconv .lt. nev0) info = 1 c c %---------------------% -c | No shifts to apply. | +c | No shifts to apply. | c %---------------------% c if (np .eq. 0 .and. nconv .lt. nev0) info = 2 @@ -631,7 +631,7 @@ subroutine znaup2 go to 1100 c else if ( (nconv .lt. nev0) .and. (ishift .eq. 1) ) then -c +c c %-------------------------------------------------% c | Do not have all the requested eigenvalues yet. | c | To prevent possible stagnation, adjust the size | @@ -646,24 +646,24 @@ subroutine znaup2 nev = 2 end if np = kplusp - nev -c +c c %---------------------------------------% c | If the size of NEV was just increased | c | resort the eigenvalues. | c %---------------------------------------% -c - if (nevbef .lt. nev) +c + if (nevbef .lt. nev) & call zngets (ishift, which, nev, np, ritz, bounds) c - end if -c + end if +c if (msglvl .gt. 0) then - call ivout (logfil, 1, nconv, ndigit, + call ivout (logfil, 1, [nconv], ndigit, & '_naup2: no. of "converged" Ritz values at this iter.') if (msglvl .gt. 1) then kp(1) = nev kp(2) = np - call ivout (logfil, 2, kp, ndigit, + call ivout (logfil, 2, kp, ndigit, & '_naup2: NEV and NP are') call zvout (logfil, nev, ritz(np+1), ndigit, & '_naup2: "wanted" Ritz values ') @@ -687,7 +687,7 @@ subroutine znaup2 ushift = .false. c if ( ishift .ne. 1 ) then -c +c c %----------------------------------% c | Move the NP shifts from WORKL to | c | RITZ, to free up WORKL | @@ -697,12 +697,12 @@ subroutine znaup2 call zcopy (np, workl, 1, ritz, 1) end if c - if (msglvl .gt. 2) then - call ivout (logfil, 1, np, ndigit, + if (msglvl .gt. 2) then + call ivout (logfil, 1, [np], ndigit, & '_naup2: The number of shifts to apply ') call zvout (logfil, np, ritz, ndigit, & '_naup2: values of the shifts') - if ( ishift .eq. 1 ) + if ( ishift .eq. 1 ) & call zvout (logfil, np, bounds, ndigit, & '_naup2: Ritz estimates of the shifts') end if @@ -714,7 +714,7 @@ subroutine znaup2 c | The first 2*N locations of WORKD are used as workspace. | c %---------------------------------------------------------% c - call znapps (n, nev, np, ritz, v, ldv, + call znapps (n, nev, np, ritz, v, ldv, & h, ldh, resid, q, ldq, workl, workd) c c %---------------------------------------------% @@ -724,50 +724,50 @@ subroutine znaup2 c %---------------------------------------------% c cnorm = .true. - call second (t2) + call arscnd (t2) if (bmat .eq. 'G') then nbx = nbx + 1 call zcopy (n, resid, 1, workd(n+1), 1) ipntr(1) = n + 1 ipntr(2) = 1 ido = 2 -c +c c %----------------------------------% c | Exit in order to compute B*RESID | c %----------------------------------% -c +c go to 9000 else if (bmat .eq. 'I') then call zcopy (n, resid, 1, workd, 1) end if -c +c 100 continue -c +c c %----------------------------------% c | Back from reverse communication; | c | WORKD(1:N) := B*RESID | c %----------------------------------% c if (bmat .eq. 'G') then - call second (t3) + call arscnd (t3) tmvbx = tmvbx + (t3 - t2) end if -c - if (bmat .eq. 'G') then - cmpnorm = zdotc (n, resid, 1, workd, 1) - rnorm = sqrt(dlapy2 (dble (cmpnorm),dimag (cmpnorm))) +c + if (bmat .eq. 'G') then + cmpnorm = zzdotc (n, resid, 1, workd, 1) + rnorm = sqrt(dlapy2 (dble (cmpnorm),aimag (cmpnorm))) else if (bmat .eq. 'I') then rnorm = dznrm2 (n, resid, 1) end if cnorm = .false. c if (msglvl .gt. 2) then - call dvout (logfil, 1, rnorm, ndigit, + call dvout (logfil, 1, [rnorm], ndigit, & '_naup2: B-norm of residual for compressed factorization') call zmout (logfil, nev, nev, h, ldh, ndigit, & '_naup2: Compressed upper Hessenberg matrix H') end if -c +c go to 1000 c c %---------------------------------------------------------------% @@ -780,7 +780,7 @@ subroutine znaup2 c mxiter = iter nev = nconv -c +c 1200 continue ido = 99 c @@ -788,9 +788,9 @@ subroutine znaup2 c | Error Exit | c %------------% c - call second (t1) + call arscnd (t1) tcaup2 = t1 - t0 -c +c 9000 continue c c %---------------% diff --git a/Toolbox/arpack-src/znaupd.f b/Toolbox/arpack-src/znaupd.f index 44c743ce7..c7d58aaab 100644 --- a/Toolbox/arpack-src/znaupd.f +++ b/Toolbox/arpack-src/znaupd.f @@ -2,19 +2,19 @@ c c\Name: znaupd c -c\Description: +c\Description: c Reverse communication interface for the Implicitly Restarted Arnoldi -c iteration. This is intended to be used to find a few eigenpairs of a -c complex linear operator OP with respect to a semi-inner product defined -c by a hermitian positive semi-definite real matrix B. B may be the identity -c matrix. NOTE: if both OP and B are real, then dsaupd or dnaupd should +c iteration. This is intended to be used to find a few eigenpairs of a +c complex linear operator OP with respect to a semi-inner product defined +c by a hermitian positive semi-definite real matrix B. B may be the identity +c matrix. NOTE: if both OP and B are real, then dsaupd or dnaupd should c be used. c c c The computed approximate eigenvalues are called Ritz values and c the corresponding approximate eigenvectors are called Ritz vectors. c -c znaupd is usually called iteratively to solve one of the +c znaupd is usually called iteratively to solve one of the c following problems: c c Mode 1: A*x = lambda*x. @@ -25,10 +25,10 @@ c ===> (If M can be factored see remark 3 below) c c Mode 3: A*x = lambda*M*x, M hermitian semi-definite -c ===> OP = inv[A - sigma*M]*M and B = M. -c ===> shift-and-invert mode +c ===> OP = inv[A - sigma*M]*M and B = M. +c ===> shift-and-invert mode c If OP*x = amu*x, then lambda = sigma + 1/amu. -c +c c c NOTE: The action of w <- inv[A - sigma*M]*v or w <- inv[M]*v c should be accomplished either by a direct method @@ -49,12 +49,12 @@ c c\Arguments c IDO Integer. (INPUT/OUTPUT) -c Reverse communication flag. IDO must be zero on the first -c call to znaupd. IDO will be set internally to +c Reverse communication flag. IDO must be zero on the first +c call to znaupd . IDO will be set internally to c indicate the type of operation to be performed. Control is c then given back to the calling routine which has the c responsibility to carry out the requested operation and call -c znaupd with the result. The operand is given in +c znaupd with the result. The operand is given in c WORKD(IPNTR(1)), the result must be put in WORKD(IPNTR(2)). c ------------------------------------------------------------- c IDO = 0: first call to the reverse communication interface @@ -72,14 +72,14 @@ c IDO = 2: compute Y = M * X where c IPNTR(1) is the pointer into WORKD for X, c IPNTR(2) is the pointer into WORKD for Y. -c IDO = 3: compute and return the shifts in the first +c IDO = 3: compute and return the shifts in the first c NP locations of WORKL. c IDO = 99: done c ------------------------------------------------------------- -c After the initialization phase, when the routine is used in -c the "shift-and-invert" mode, the vector M * X is already +c After the initialization phase, when the routine is used in +c the "shift-and-invert" mode, the vector M * X is already c available and does not need to be recomputed in forming OP*X. -c +c c BMAT Character*1. (INPUT) c BMAT specifies the type of the matrix B that defines the c semi-inner product for the operator OP. @@ -100,15 +100,15 @@ c NEV Integer. (INPUT) c Number of eigenvalues of OP to be computed. 0 < NEV < N-1. c -c TOL Double precision scalar. (INPUT) -c Stopping criteria: the relative accuracy of the Ritz value +c TOL Double precision scalar. (INPUT) +c Stopping criteria: the relative accuracy of the Ritz value c is considered acceptable if BOUNDS(I) .LE. TOL*ABS(RITZ(I)) c where ABS(RITZ(I)) is the magnitude when RITZ(I) is complex. -c DEFAULT = dlamch('EPS') (machine precision as computed -c by the LAPACK auxiliary subroutine dlamch). +c DEFAULT = dlamch ('EPS') (machine precision as computed +c by the LAPACK auxiliary subroutine dlamch ). c -c RESID Complex*16 array of length N. (INPUT/OUTPUT) -c On INPUT: +c RESID Complex*16 array of length N. (INPUT/OUTPUT) +c On INPUT: c If INFO .EQ. 0, a random initial residual vector is used. c If INFO .NE. 0, RESID contains the initial residual vector, c possibly from a previous run. @@ -118,15 +118,15 @@ c NCV Integer. (INPUT) c Number of columns of the matrix V. NCV must satisfy the two c inequalities 1 <= NCV-NEV and NCV <= N. -c This will indicate how many Arnoldi vectors are generated -c at each iteration. After the startup phase in which NEV -c Arnoldi vectors are generated, the algorithm generates -c approximately NCV-NEV Arnoldi vectors at each subsequent update -c iteration. Most of the cost in generating each Arnoldi vector is +c This will indicate how many Arnoldi vectors are generated +c at each iteration. After the startup phase in which NEV +c Arnoldi vectors are generated, the algorithm generates +c approximately NCV-NEV Arnoldi vectors at each subsequent update +c iteration. Most of the cost in generating each Arnoldi vector is c in the matrix-vector operation OP*x. (See remark 4 below.) c -c V Complex*16 array N by NCV. (OUTPUT) -c Contains the final set of Arnoldi basis vectors. +c V Complex*16 array N by NCV. (OUTPUT) +c Contains the final set of Arnoldi basis vectors. c c LDV Integer. (INPUT) c Leading dimension of V exactly as declared in the calling program. @@ -137,23 +137,23 @@ c the components of the unwanted eigenvector. c ------------------------------------------------------------- c ISHIFT = 0: the shifts are to be provided by the user via -c reverse communication. The NCV eigenvalues of +c reverse communication. The NCV eigenvalues of c the Hessenberg matrix H are returned in the part c of WORKL array corresponding to RITZ. c ISHIFT = 1: exact shifts with respect to the current -c Hessenberg matrix H. This is equivalent to -c restarting the iteration from the beginning +c Hessenberg matrix H. This is equivalent to +c restarting the iteration from the beginning c after updating the starting vector with a linear -c combination of Ritz vectors associated with the +c combination of Ritz vectors associated with the c "wanted" eigenvalues. c ISHIFT = 2: other choice of internal shift to be defined. c ------------------------------------------------------------- c -c IPARAM(2) = No longer referenced +c IPARAM(2) = No longer referenced c c IPARAM(3) = MXITER -c On INPUT: maximum number of Arnoldi update iterations allowed. -c On OUTPUT: actual number of Arnoldi update iterations taken. +c On INPUT: maximum number of Arnoldi update iterations allowed. +c On OUTPUT: actual number of Arnoldi update iterations taken. c c IPARAM(4) = NB: blocksize to be used in the recurrence. c The code currently works only for NB = 1. @@ -163,11 +163,11 @@ c the convergence criterion. c c IPARAM(6) = IUPD -c No longer referenced. Implicit restarting is ALWAYS used. +c No longer referenced. Implicit restarting is ALWAYS used. c c IPARAM(7) = MODE c On INPUT determines what type of eigenproblem is being solved. -c Must be 1,2,3; See under \Description of znaupd for the +c Must be 1,2,3; See under \Description of znaupd for the c four modes available. c c IPARAM(8) = NP @@ -186,7 +186,7 @@ c ------------------------------------------------------------- c IPNTR(1): pointer to the current operand vector X in WORKD. c IPNTR(2): pointer to the current result vector Y in WORKD. -c IPNTR(3): pointer to the vector B * X in WORKD when used in +c IPNTR(3): pointer to the vector B * X in WORKD when used in c the shift-and-invert mode. c IPNTR(4): pointer to the next available location in WORKL c that is untouched by the program. @@ -197,9 +197,9 @@ c IPNTR(8): pointer to the error BOUNDS array in WORKL. c IPNTR(14): pointer to the NP shifts in WORKL. See Remark 5 below. c -c Note: IPNTR(9:13) is only referenced by zneupd. See Remark 2 below. +c Note: IPNTR(9:13) is only referenced by zneupd . See Remark 2 below. c -c IPNTR(9): pointer to the NCV RITZ values of the +c IPNTR(9): pointer to the NCV RITZ values of the c original system. c IPNTR(10): Not Used c IPNTR(11): pointer to the NCV corresponding error bounds. @@ -207,24 +207,24 @@ c Schur matrix for H. c IPNTR(13): pointer to the NCV by NCV matrix of eigenvectors c of the upper Hessenberg matrix H. Only referenced by -c zneupd if RVEC = .TRUE. See Remark 2 below. +c zneupd if RVEC = .TRUE. See Remark 2 below. c c ------------------------------------------------------------- -c -c WORKD Complex*16 work array of length 3*N. (REVERSE COMMUNICATION) +c +c WORKD Complex*16 work array of length 3*N. (REVERSE COMMUNICATION) c Distributed array to be used in the basic Arnoldi iteration -c for reverse communication. The user should not use WORKD +c for reverse communication. The user should not use WORKD c as temporary workspace during the iteration !!!!!!!!!! -c See Data Distribution Note below. +c See Data Distribution Note below. c -c WORKL Complex*16 work array of length LWORKL. (OUTPUT/WORKSPACE) +c WORKL Complex*16 work array of length LWORKL. (OUTPUT/WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. See Data Distribution Note below. c c LWORKL Integer. (INPUT) c LWORKL must be at least 3*NCV**2 + 5*NCV. c -c RWORK Double precision work array of length NCV (WORKSPACE) +c RWORK Double precision work array of length NCV (WORKSPACE) c Private (replicated) array on each PE or array allocated on c the front end. c @@ -236,18 +236,18 @@ c Error flag on output. c = 0: Normal exit. c = 1: Maximum number of iterations taken. -c All possible eigenvalues of OP has been found. IPARAM(5) +c All possible eigenvalues of OP has been found. IPARAM(5) c returns the number of wanted converged Ritz values. c = 2: No longer an informational error. Deprecated starting c with release 2 of ARPACK. -c = 3: No shifts could be applied during a cycle of the -c Implicitly restarted Arnoldi iteration. One possibility -c is to increase the size of NCV relative to NEV. +c = 3: No shifts could be applied during a cycle of the +c Implicitly restarted Arnoldi iteration. One possibility +c is to increase the size of NCV relative to NEV. c See remark 4 below. c = -1: N must be positive. c = -2: NEV must be positive. -c = -3: NCV-NEV >= 1 and less than or equal to N. -c = -4: The maximum number of Arnoldi update iteration +c = -3: NCV-NEV >= 2 and less than or equal to N. +c = -4: The maximum number of Arnoldi update iteration c must be greater than zero. c = -5: WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI' c = -6: BMAT must be one of 'I' or 'G'. @@ -268,16 +268,16 @@ c selection of WHICH should be made with this in mind when using c Mode = 3. When operating in Mode = 3 setting WHICH = 'LM' will c compute the NEV eigenvalues of the original problem that are -c closest to the shift SIGMA . After convergence, approximate eigenvalues -c of the original problem may be obtained with the ARPACK subroutine zneupd. +c closest to the shift SIGMA . After convergence, approximate eigenvalues +c of the original problem may be obtained with the ARPACK subroutine zneupd . c -c 2. If a basis for the invariant subspace corresponding to the converged Ritz -c values is needed, the user must call zneupd immediately following -c completion of znaupd. This is new starting with release 2 of ARPACK. +c 2. If a basis for the invariant subspace corresponding to the converged Ritz +c values is needed, the user must call zneupd immediately following +c completion of znaupd . This is new starting with release 2 of ARPACK. c c 3. If M can be factored into a Cholesky factorization M = LL` c then Mode = 2 should not be selected. Instead one should use -c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular +c Mode = 1 with OP = inv(L)*A*inv(L`). Appropriate triangular c linear systems should be solved with L and L` rather c than computing inverses. After convergence, an approximate c eigenvector z of the original problem is recovered by solving @@ -287,11 +287,11 @@ c of NCV relative to NEV. The only formal requirement is that NCV > NEV + 1. c However, it is recommended that NCV .ge. 2*NEV. If many problems of c the same type are to be solved, one should experiment with increasing -c NCV while keeping NEV fixed for a given test problem. This will +c NCV while keeping NEV fixed for a given test problem. This will c usually decrease the required number of OP*x operations but it c also increases the work and storage required to maintain the orthogonal c basis vectors. The optimal "cross-over" with respect to CPU time -c is problem dependent and must be determined empirically. +c is problem dependent and must be determined empirically. c See Chapter 8 of Reference 2 for further information. c c 5. When IPARAM(1) = 0, and IDO = 3, the user needs to provide the @@ -305,11 +305,11 @@ c c----------------------------------------------------------------------- c -c\Data Distribution Note: +c\Data Distribution Note: c c Fortran-D syntax: c ================ -c Complex*16 resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) +c Complex*16 resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) c decompose d1(n), d2(n,ncv) c align resid(i) with d1(i) c align v(i,j) with d2(i,j) @@ -321,13 +321,13 @@ c c Cray MPP syntax: c =============== -c Complex*16 resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) +c Complex*16 resid(n), v(ldv,ncv), workd(n,3), workl(lworkl) c shared resid(block), v(block,:), workd(block,:) c replicated workl(lworkl) -c +c c CM2/CM5 syntax: c ============== -c +c c----------------------------------------------------------------------- c c include 'ex-nonsym.doc' @@ -343,32 +343,32 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B.N. Parlett & Y. Saad, "_Complex_ Shift and Invert Strategies for -c Double precision Matrices", Linear Algebra and its Applications, vol 88/89, +c _Real_ Matrices", Linear Algebra and its Applications, vol 88/89, c pp 575-595, (1987). c c\Routines called: -c znaup2 ARPACK routine that implements the Implicitly Restarted +c znaup2 ARPACK routine that implements the Implicitly Restarted c Arnoldi Iteration. -c zstatn ARPACK routine that initializes the timing variables. +c zstatn ARPACK routine that initializes the timing variables. c ivout ARPACK utility routine that prints integers. -c zvout ARPACK utility routine that prints vectors. -c second ARPACK utility routine for timing. -c dlamch LAPACK routine that determines machine constants. +c zvout ARPACK utility routine that prints vectors. +c arscnd ARPACK utility routine for timing. +c dlamch LAPACK routine that determines machine constants. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas -c +c Applied Mathematics +c Rice University +c Houston, Texas +c c\SCCS Information: @(#) -c FILE: naupd.F SID: 2.9 DATE OF SID: 07/21/02 RELEASE: 2 +c FILE: naupd.F SID: 2.8 DATE OF SID: 04/10/01 RELEASE: 2 c c\Remarks c @@ -377,15 +377,15 @@ c----------------------------------------------------------------------- c subroutine znaupd - & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, + & ( ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, & ipntr, workd, workl, lworkl, rwork, info ) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -393,7 +393,7 @@ subroutine znaupd c character bmat*1, which*2 integer ido, info, ldv, lworkl, n, ncv, nev - Double precision + Double precision & tol c c %-----------------% @@ -403,7 +403,7 @@ subroutine znaupd integer iparam(11), ipntr(14) Complex*16 & resid(n), v(ldv,ncv), workd(3*n), workl(lworkl) - Double precision + Double precision & rwork(ncv) c c %------------% @@ -412,13 +412,13 @@ subroutine znaupd c Complex*16 & one, zero - parameter (one = (1.0D+0, 0.0D+0), zero = (0.0D+0, 0.0D+0)) + parameter (one = (1.0D+0, 0.0D+0) , zero = (0.0D+0, 0.0D+0) ) c c %---------------% c | Local Scalars | c %---------------% c - integer bounds, ierr, ih, iq, ishift, iupd, iw, + integer bounds, ierr, ih, iq, ishift, iupd, iw, & ldh, ldq, levec, mode, msglvl, mxiter, nb, & nev0, next, np, ritz, j save bounds, ih, iq, ishift, iupd, iw, @@ -429,29 +429,29 @@ subroutine znaupd c | External Subroutines | c %----------------------% c - external znaup2, zvout, ivout, second, zstatn + external znaup2 , zvout , ivout, arscnd, zstatn c c %--------------------% c | External Functions | c %--------------------% c - Double precision + Double precision & dlamch external dlamch c c %-----------------------% c | Executable Statements | c %-----------------------% -c +c if (ido .eq. 0) then -c +c c %-------------------------------% c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% c call zstatn - call second (t0) + call arscnd (t0) msglvl = mcaupd c c %----------------% @@ -496,7 +496,7 @@ subroutine znaupd else if (mode .eq. 1 .and. bmat .eq. 'G') then ierr = -11 end if -c +c c %------------% c | Error Exit | c %------------% @@ -506,14 +506,14 @@ subroutine znaupd ido = 99 go to 9000 end if -c +c c %------------------------% c | Set default parameters | c %------------------------% c if (nb .le. 0) nb = 1 - if (tol .le. 0.0D+0 ) tol = dlamch('EpsMach') - if (ishift .ne. 0 .and. + if (tol .le. 0.0D+0 ) tol = dlamch ('EpsMach') + if (ishift .ne. 0 .and. & ishift .ne. 1 .and. & ishift .ne. 2) ishift = 1 c @@ -525,8 +525,8 @@ subroutine znaupd c %----------------------------------------------% c np = ncv - nev - nev0 = nev -c + nev0 = nev +c c %-----------------------------% c | Zero out internal workspace | c %-----------------------------% @@ -534,7 +534,7 @@ subroutine znaupd do 10 j = 1, 3*ncv**2 + 5*ncv workl(j) = zero 10 continue -c +c c %-------------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, BOUNDS, Q | c | etc... and the remaining workspace. | @@ -545,8 +545,8 @@ subroutine znaupd c | workl(ncv*ncv+ncv+1:ncv*ncv+2*ncv) := error bounds | c | workl(ncv*ncv+2*ncv+1:2*ncv*ncv+2*ncv) := rotation matrix Q | c | workl(2*ncv*ncv+2*ncv+1:3*ncv*ncv+5*ncv) := workspace | -c | The final workspace is needed by subroutine zneigh called | -c | by znaup2. Subroutine zneigh calls LAPACK routines for | +c | The final workspace is needed by subroutine zneigh called | +c | by znaup2 . Subroutine zneigh calls LAPACK routines for | c | calculating eigenvalues and the last row of the eigenvector | c | matrix. | c %-------------------------------------------------------------% @@ -572,12 +572,12 @@ subroutine znaupd c | Carry out the Implicitly restarted Arnoldi Iteration. | c %-------------------------------------------------------% c - call znaup2 + call znaup2 & ( ido, bmat, n, which, nev0, np, tol, resid, mode, iupd, - & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritz), - & workl(bounds), workl(iq), ldq, workl(iw), + & ishift, mxiter, v, ldv, workl(ih), ldh, workl(ritz), + & workl(bounds), workl(iq), ldq, workl(iw), & ipntr, workd, rwork, info ) -c +c c %--------------------------------------------------% c | ido .ne. 99 implies use of reverse communication | c | to compute operations involving OP. | @@ -585,7 +585,7 @@ subroutine znaupd c if (ido .eq. 3) iparam(8) = np if (ido .ne. 99) go to 9000 -c +c iparam(3) = mxiter iparam(5) = np iparam(9) = nopx @@ -594,24 +594,24 @@ subroutine znaupd c c %------------------------------------% c | Exit if there was an informational | -c | error within znaup2. | +c | error within znaup2 . | c %------------------------------------% c if (info .lt. 0) go to 9000 if (info .eq. 2) info = 3 c if (msglvl .gt. 0) then - call ivout (logfil, 1, mxiter, ndigit, + call ivout (logfil, 1, [mxiter], ndigit, & '_naupd: Number of update iterations taken') - call ivout (logfil, 1, np, ndigit, + call ivout (logfil, 1, [np], ndigit, & '_naupd: Number of wanted "converged" Ritz values') - call zvout (logfil, np, workl(ritz), ndigit, + call zvout (logfil, np, workl(ritz), ndigit, & '_naupd: The final Ritz values') - call zvout (logfil, np, workl(bounds), ndigit, + call zvout (logfil, np, workl(bounds), ndigit, & '_naupd: Associated Ritz estimates') end if c - call second (t1) + call arscnd (t1) tcaupd = t1 - t0 c if (msglvl .gt. 0) then @@ -627,8 +627,8 @@ subroutine znaupd 1000 format (//, & 5x, '=============================================',/ & 5x, '= Complex implicit Arnoldi update code =',/ - & 5x, '= Version Number: ', ' 2.3', 21x, ' =',/ - & 5x, '= Version Date: ', ' 07/31/96', 16x, ' =',/ + & 5x, '= Version Number: ', ' 2.3' , 21x, ' =',/ + & 5x, '= Version Date: ', ' 07/31/96' , 16x, ' =',/ & 5x, '=============================================',/ & 5x, '= Summary of timing statistics =',/ & 5x, '=============================================',//) @@ -658,7 +658,7 @@ subroutine znaupd return c c %---------------% -c | End of znaupd | +c | End of znaupd | c %---------------% c end diff --git a/Toolbox/arpack-src/zneigh.f b/Toolbox/arpack-src/zneigh.f index c9ace20a9..db1bc2298 100644 --- a/Toolbox/arpack-src/zneigh.f +++ b/Toolbox/arpack-src/zneigh.f @@ -12,7 +12,7 @@ c c\Arguments c RNORM Double precision scalar. (INPUT) -c Residual norm corresponding to the current upper Hessenberg +c Residual norm corresponding to the current upper Hessenberg c matrix H. c c N Integer. (INPUT) @@ -30,8 +30,8 @@ c c BOUNDS Complex*16 array of length N. (OUTPUT) c On output, BOUNDS contains the Ritz estimates associated with -c the eigenvalues held in RITZ. This is equal to RNORM -c times the last components of the eigenvectors corresponding +c the eigenvalues held in RITZ. This is equal to RNORM +c times the last components of the eigenvectors corresponding c to the eigenvalues in RITZ. c c Q Complex*16 N by N array. (WORKSPACE) @@ -48,7 +48,7 @@ c c RWORK Double precision work array of length N (WORKSPACE) c Private (replicated) array on each PE or array allocated on -c the front end. +c the front end. c c IERR Integer. (OUTPUT) c Error exit flag from zlahqr or ztrevc. @@ -64,7 +64,7 @@ c c\Routines called: c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c zmout ARPACK utility routine that prints matrices c zvout ARPACK utility routine that prints vectors. c dvout ARPACK utility routine that prints vectors. @@ -74,18 +74,18 @@ c zlaset LAPACK matrix initialization routine. c ztrevc LAPACK routine to compute the eigenvectors of a matrix c in upper triangular form -c zcopy Level 1 BLAS that copies one vector to another. +c zcopy Level 1 BLAS that copies one vector to another. c zdscal Level 1 BLAS that scales a complex vector by a real number. c dznrm2 Level 1 BLAS that computes the norm of a vector. -c +c c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: neigh.F SID: 2.2 DATE OF SID: 4/20/96 RELEASE: 2 @@ -97,52 +97,52 @@ c c----------------------------------------------------------------------- c - subroutine zneigh (rnorm, n, h, ldh, ritz, bounds, + subroutine zneigh (rnorm, n, h, ldh, ritz, bounds, & q, ldq, workl, rwork, ierr) c c %----------------------------------------------------% c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | c %------------------% c integer ierr, n, ldh, ldq - Double precision + Double precision & rnorm c c %-----------------% c | Array Arguments | c %-----------------% c - Complex*16 + Complex*16 & bounds(n), h(ldh,n), q(ldq,n), ritz(n), - & workl(n*(n+3)) - Double precision + & workl(n*(n+3)) + Double precision & rwork(n) -c +c c %------------% c | Parameters | c %------------% c - Complex*16 + Complex*16 & one, zero Double precision & rone parameter (one = (1.0D+0, 0.0D+0), zero = (0.0D+0, 0.0D+0), & rone = 1.0D+0) -c +c c %------------------------% c | Local Scalars & Arrays | c %------------------------% c logical select(1) integer j, msglvl - Complex*16 + Complex*16 & vl(1) Double precision & temp @@ -151,14 +151,14 @@ subroutine zneigh (rnorm, n, h, ldh, ritz, bounds, c | External Subroutines | c %----------------------% c - external zlacpy, zlahqr, ztrevc, zcopy, - & zdscal, zmout, zvout, second + external zlacpy, zlahqr, ztrevc, zcopy, + & zdscal, zmout, zvout, arscnd c c %--------------------% c | External Functions | c %--------------------% c - Double precision + Double precision & dznrm2 external dznrm2 c @@ -171,19 +171,19 @@ subroutine zneigh (rnorm, n, h, ldh, ritz, bounds, c | & message level for debugging | c %-------------------------------% c - call second (t0) + call arscnd (t0) msglvl = mceigh -c +c if (msglvl .gt. 2) then - call zmout (logfil, n, n, h, ldh, ndigit, + call zmout (logfil, n, n, h, ldh, ndigit, & '_neigh: Entering upper Hessenberg matrix H ') end if -c +c c %----------------------------------------------------------% c | 1. Compute the eigenvalues, the last components of the | c | corresponding Schur vectors and the full Schur form T | c | of the current upper Hessenberg matrix H. | -c | zlahqr returns the full Schur form of H | +c | zlahqr returns the full Schur form of H | c | in WORKL(1:N**2), and the Schur vectors in q. | c %----------------------------------------------------------% c @@ -205,7 +205,7 @@ subroutine zneigh (rnorm, n, h, ldh, ritz, bounds, c | eigenvectors. | c %----------------------------------------------------------% c - call ztrevc ('Right', 'Back', select, n, workl, n, vl, n, q, + call ztrevc ('Right', 'Back', select, n, workl, n, vl, n, q, & ldq, n, n, workl(n*n+1), rwork, ierr) c if (ierr .ne. 0) go to 9000 @@ -244,7 +244,7 @@ subroutine zneigh (rnorm, n, h, ldh, ritz, bounds, & '_neigh: Ritz estimates for the eigenvalues of H') end if c - call second(t1) + call arscnd(t1) tceigh = tceigh + (t1 - t0) c 9000 continue diff --git a/Toolbox/arpack-src/zneupd.f b/Toolbox/arpack-src/zneupd.f index da8e3614b..92e7dc998 100644 --- a/Toolbox/arpack-src/zneupd.f +++ b/Toolbox/arpack-src/zneupd.f @@ -1,48 +1,48 @@ c\BeginDoc -c -c\Name: zneupd -c -c\Description: -c This subroutine returns the converged approximations to eigenvalues -c of A*z = lambda*B*z and (optionally): -c -c (1) The corresponding approximate eigenvectors; -c -c (2) An orthonormal basis for the associated approximate -c invariant subspace; -c -c (3) Both. -c -c There is negligible additional cost to obtain eigenvectors. An orthonormal +c +c\Name: zneupd +c +c\Description: +c This subroutine returns the converged approximations to eigenvalues +c of A*z = lambda*B*z and (optionally): +c +c (1) The corresponding approximate eigenvectors; +c +c (2) An orthonormal basis for the associated approximate +c invariant subspace; +c +c (3) Both. +c +c There is negligible additional cost to obtain eigenvectors. An orthonormal c basis is always computed. There is an additional storage cost of n*nev -c if both are requested (in this case a separate array Z must be supplied). +c if both are requested (in this case a separate array Z must be supplied). c c The approximate eigenvalues and eigenvectors of A*z = lambda*B*z c are derived from approximate eigenvalues and eigenvectors of c of the linear operator OP prescribed by the MODE selection in the c call to ZNAUPD. ZNAUPD must be called before this routine is called. c These approximate eigenvalues and vectors are commonly called Ritz -c values and Ritz vectors respectively. They are referred to as such -c in the comments that follow. The computed orthonormal basis for the -c invariant subspace corresponding to these Ritz values is referred to as a -c Schur basis. -c +c values and Ritz vectors respectively. They are referred to as such +c in the comments that follow. The computed orthonormal basis for the +c invariant subspace corresponding to these Ritz values is referred to as a +c Schur basis. +c c The definition of OP as well as other terms and the relation of computed c Ritz values and vectors of OP with respect to the given problem -c A*z = lambda*B*z may be found in the header of ZNAUPD. For a brief +c A*z = lambda*B*z may be found in the header of ZNAUPD. For a brief c description, see definitions of IPARAM(7), MODE and WHICH in the c documentation of ZNAUPD. c c\Usage: -c call zneupd -c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, WORKEV, BMAT, -c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, +c call zneupd +c ( RVEC, HOWMNY, SELECT, D, Z, LDZ, SIGMA, WORKEV, BMAT, +c N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, WORKD, c WORKL, LWORKL, RWORK, INFO ) c c\Arguments: c RVEC LOGICAL (INPUT) c Specifies whether a basis for the invariant subspace corresponding -c to the converged Ritz value approximations for the eigenproblem +c to the converged Ritz value approximations for the eigenproblem c A*z = lambda*B*z is computed. c c RVEC = .FALSE. Compute Ritz values only. @@ -51,7 +51,7 @@ c See Remarks below. c c HOWMNY Character*1 (INPUT) -c Specifies the form of the basis for the invariant subspace +c Specifies the form of the basis for the invariant subspace c corresponding to the converged Ritz values that is to be computed. c c = 'A': Compute NEV Ritz vectors; @@ -62,34 +62,34 @@ c SELECT Logical array of dimension NCV. (INPUT) c If HOWMNY = 'S', SELECT specifies the Ritz vectors to be c computed. To select the Ritz vector corresponding to a -c Ritz value D(j), SELECT(j) must be set to .TRUE.. -c If HOWMNY = 'A' or 'P', SELECT need not be initialized +c Ritz value D(j), SELECT(j) must be set to .TRUE.. +c If HOWMNY = 'A' or 'P', SELECT need not be initialized c but it is used as internal workspace. c c D Complex*16 array of dimension NEV+1. (OUTPUT) -c On exit, D contains the Ritz approximations +c On exit, D contains the Ritz approximations c to the eigenvalues lambda for A*z = lambda*B*z. c c Z Complex*16 N by NEV array (OUTPUT) -c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of -c Z represents approximate eigenvectors (Ritz vectors) corresponding +c On exit, if RVEC = .TRUE. and HOWMNY = 'A', then the columns of +c Z represents approximate eigenvectors (Ritz vectors) corresponding c to the NCONV=IPARAM(5) Ritz values for eigensystem c A*z = lambda*B*z. c c If RVEC = .FALSE. or HOWMNY = 'P', then Z is NOT REFERENCED. c -c NOTE: If if RVEC = .TRUE. and a Schur basis is not required, -c the array Z may be set equal to first NEV+1 columns of the Arnoldi -c basis array V computed by ZNAUPD. In this case the Arnoldi basis +c NOTE: If if RVEC = .TRUE. and a Schur basis is not required, +c the array Z may be set equal to first NEV+1 columns of the Arnoldi +c basis array V computed by ZNAUPD. In this case the Arnoldi basis c will be destroyed and overwritten with the eigenvector basis. c c LDZ Integer. (INPUT) c The leading dimension of the array Z. If Ritz vectors are -c desired, then LDZ .ge. max( 1, N ) is required. +c desired, then LDZ .ge. max( 1, N ) is required. c In any case, LDZ .ge. 1 is required. c c SIGMA Complex*16 (INPUT) -c If IPARAM(7) = 3 then SIGMA represents the shift. +c If IPARAM(7) = 3 then SIGMA represents the shift. c Not referenced if IPARAM(7) = 1 or 2. c c WORKEV Complex*16 work array of dimension 2*NCV. (WORKSPACE) @@ -97,12 +97,12 @@ c **** The remaining arguments MUST be the same as for the **** c **** call to ZNAUPD that was just completed. **** c -c NOTE: The remaining arguments +c NOTE: The remaining arguments c -c BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, -c WORKD, WORKL, LWORKL, RWORK, INFO +c BMAT, N, WHICH, NEV, TOL, RESID, NCV, V, LDV, IPARAM, IPNTR, +c WORKD, WORKL, LWORKL, RWORK, INFO c -c must be passed directly to ZNEUPD following the last call +c must be passed directly to ZNEUPD following the last call c to ZNAUPD. These arguments MUST NOT BE MODIFIED between c the the last call to ZNAUPD and the call to ZNEUPD. c @@ -128,7 +128,7 @@ c WORKL(1:ncv*ncv+2*ncv) contains information obtained in c znaupd. They are not changed by zneupd. c WORKL(ncv*ncv+2*ncv+1:3*ncv*ncv+4*ncv) holds the -c untransformed Ritz values, the untransformed error estimates of +c untransformed Ritz values, the untransformed error estimates of c the Ritz values, the upper triangular matrix for H, and the c associated matrix representation of the invariant subspace for H. c @@ -187,18 +187,18 @@ c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), c pp 357-385. -c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly +c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly c Restarted Arnoldi Iteration", Rice University Technical Report c TR95-13, Department of Computational and Applied Mathematics. c 3. B. Nour-Omid, B. N. Parlett, T. Ericsson and P. S. Jensen, c "How to Implement the Spectral Transformation", Math Comp., -c Vol. 48, No. 178, April, 1987 pp. 664-673. +c Vol. 48, No. 178, April, 1987 pp. 664-673. c c\Routines called: c ivout ARPACK utility routine that prints integers. c zmout ARPACK utility routine that prints matrices c zvout ARPACK utility routine that prints vectors. -c zgeqr2 LAPACK routine that computes the QR factorization of +c zgeqr2 LAPACK routine that computes the QR factorization of c a matrix. c zlacpy LAPACK matrix copy routine. c zlahqr LAPACK routine that computes the Schur form of a @@ -207,7 +207,7 @@ c ztrevc LAPACK routine to compute the eigenvectors of a matrix c in upper triangular form. c ztrsen LAPACK routine that re-orders the Schur form. -c zunm2r LAPACK routine that applies an orthogonal matrix in +c zunm2r LAPACK routine that applies an orthogonal matrix in c factored form. c dlamch LAPACK routine that determines machine constants. c ztrmm Level 3 BLAS matrix times an upper triangular matrix. @@ -219,7 +219,7 @@ c c\Remarks c -c 1. Currently only HOWMNY = 'A' and 'P' are implemented. +c 1. Currently only HOWMNY = 'A' and 'P' are implemented. c c 2. Schur vectors are an orthogonal representation for the basis of c Ritz vectors. Thus, their numerical properties are often superior. @@ -227,16 +227,16 @@ c A * V(:,1:IPARAM(5)) = V(:,1:IPARAM(5)) * T, and c transpose( V(:,1:IPARAM(5)) ) * V(:,1:IPARAM(5)) = I c are approximately satisfied. -c Here T is the leading submatrix of order IPARAM(5) of the -c upper triangular matrix stored workl(ipntr(12)). +c Here T is the leading submatrix of order IPARAM(5) of the +c upper triangular matrix stored workl(ipntr(12)). c c\Authors c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University -c Chao Yang Houston, Texas -c Dept. of Computational & -c Applied Mathematics -c Rice University +c Chao Yang Houston, Texas +c Dept. of Computational & +c Applied Mathematics +c Rice University c Houston, Texas c c\SCCS Information: @(#) @@ -256,8 +256,8 @@ subroutine zneupd(rvec , howmny, select, d , c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -266,9 +266,9 @@ subroutine zneupd(rvec , howmny, select, d , character bmat, howmny, which*2 logical rvec integer info, ldz, ldv, lworkl, n, ncv, nev - Complex*16 + Complex*16 & sigma - Double precision + Double precision & tol c c %-----------------% @@ -281,7 +281,7 @@ subroutine zneupd(rvec , howmny, select, d , & rwork(ncv) Complex*16 & d(nev) , resid(n) , v(ldv,ncv), - & z(ldz, nev), + & z(ldz, nev), & workd(3*n) , workl(lworkl), workev(2*ncv) c c %------------% @@ -301,7 +301,7 @@ subroutine zneupd(rvec , howmny, select, d , & invsub, iuptri, iwev , j , ldh , ldq , & mode , msglvl, ritz , wr , k , irz , & ibd , outncv, iq , np , numcnv, jj , - & ishift + & ishift, nconv2 Complex*16 & rnorm, temp, vl(1) Double precision @@ -315,7 +315,7 @@ subroutine zneupd(rvec , howmny, select, d , external zcopy , zgeru, zgeqr2, zlacpy, zmout, & zunm2r, ztrmm, zvout, ivout, & zlahqr -c +c c %--------------------% c | External Functions | c %--------------------% @@ -325,13 +325,13 @@ subroutine zneupd(rvec , howmny, select, d , external dznrm2, dlamch, dlapy2 c Complex*16 - & zdotc - external zdotc + & zzdotc + external zzdotc c c %-----------------------% c | Executable Statements | c %-----------------------% -c +c c %------------------------% c | Set default parameters | c %------------------------% @@ -362,7 +362,7 @@ subroutine zneupd(rvec , howmny, select, d , ierr = -1 else if (nev .le. 0) then ierr = -2 - else if (ncv .le. nev .or. ncv .gt. n) then + else if (ncv .le. nev+1 .or. ncv .gt. n) then ierr = -3 else if (which .ne. 'LM' .and. & which .ne. 'SM' .and. @@ -382,12 +382,12 @@ subroutine zneupd(rvec , howmny, select, d , else if (howmny .eq. 'S' ) then ierr = -12 end if -c +c if (mode .eq. 1 .or. mode .eq. 2) then type = 'REGULR' else if (mode .eq. 3 ) then type = 'SHIFTI' - else + else ierr = -10 end if if (mode .eq. 1 .and. bmat .eq. 'G') ierr = -11 @@ -400,7 +400,7 @@ subroutine zneupd(rvec , howmny, select, d , info = ierr go to 9000 end if -c +c c %--------------------------------------------------------% c | Pointer into WORKL for address of H, RITZ, WORKEV, Q | c | etc... and the remaining workspace. | @@ -428,7 +428,7 @@ subroutine zneupd(rvec , howmny, select, d , c | subspace for H. | c | GRAND total of NCV * ( 3 * NCV + 4 ) locations. | c %-----------------------------------------------------------% -c +c ih = ipntr(5) ritz = ipntr(6) iq = ipntr(7) @@ -516,15 +516,15 @@ subroutine zneupd(rvec , howmny, select, d , do 11 j = 1,ncv rtemp = max(eps23, & dlapy2 ( dble(workl(irz+ncv-j)), - & dimag(workl(irz+ncv-j)) )) + & aimag(workl(irz+ncv-j)) )) jj = workl(bounds + ncv - j) if (numcnv .lt. nconv .and. & dlapy2( dble(workl(ibd+jj-1)), - & dimag(workl(ibd+jj-1)) ) + & aimag(workl(ibd+jj-1)) ) & .le. tol*rtemp) then select(jj) = .true. numcnv = numcnv + 1 - if (jj .gt. nev) reord = .true. + if (jj .gt. nconv) reord = .true. endif 11 continue c @@ -536,9 +536,9 @@ subroutine zneupd(rvec , howmny, select, d , c %-----------------------------------------------------------% c if (msglvl .gt. 2) then - call ivout(logfil, 1, numcnv, ndigit, + call ivout(logfil, 1, [numcnv], ndigit, & '_neupd: Number of specified eigenvalues') - call ivout(logfil, 1, nconv, ndigit, + call ivout(logfil, 1, [nconv], ndigit, & '_neupd: Number of "converged" eigenvalues') end if c @@ -555,10 +555,10 @@ subroutine zneupd(rvec , howmny, select, d , c %-------------------------------------------------------% c call zcopy(ldh*ncv, workl(ih), 1, workl(iuptri), 1) - call zlaset('All', ncv, ncv , + call zlaset('All', ncv, ncv , & zero , one, workl(invsub), & ldq) - call zlahqr(.true., .true. , ncv , + call zlahqr(.true., .true. , ncv , & 1 , ncv , workl(iuptri), & ldh , workl(iheig) , 1 , & ncv , workl(invsub), ldq , @@ -577,7 +577,7 @@ subroutine zneupd(rvec , howmny, select, d , call zvout (logfil, ncv, workl(ihbds), ndigit, & '_neupd: Last row of the Schur vector matrix') if (msglvl .gt. 3) then - call zmout (logfil , ncv, ncv , + call zmout (logfil , ncv, ncv , & workl(iuptri), ldh, ndigit, & '_neupd: The upper triangular matrix ') end if @@ -592,9 +592,13 @@ subroutine zneupd(rvec , howmny, select, d , call ztrsen('None' , 'V' , select , & ncv , workl(iuptri), ldh , & workl(invsub), ldq , workl(iheig), - & nconv , conds , sep , + & nconv2 , conds , sep , & workev , ncv , ierr) c + if (nconv2 .lt. nconv) then + nconv = nconv2 + end if + if (ierr .eq. 1) then info = 1 go to 9000 @@ -621,7 +625,7 @@ subroutine zneupd(rvec , howmny, select, d , c call zcopy(ncv , workl(invsub+ncv-1), ldq, & workl(ihbds), 1) -c +c c %--------------------------------------------% c | Place the computed eigenvalues of H into D | c | if a spectral transformation was not used. | @@ -647,7 +651,7 @@ subroutine zneupd(rvec , howmny, select, d , c | * Postmultiply Z by R. | c | The N by NCONV matrix Z is now a matrix representation | c | of the approximate invariant subspace associated with | -c | the Ritz values in workl(iheig). The first NCONV | +c | the Ritz values in workl(iheig). The first NCONV | c | columns of V are now approximate Schur vectors | c | associated with the upper triangular matrix of order | c | NCONV in workl(iuptri). | @@ -670,7 +674,7 @@ subroutine zneupd(rvec , howmny, select, d , c | matrix consisting of plus or minus ones. | c %---------------------------------------------------% c - if ( dble( workl(invsub+(j-1)*ldq+j-1) ) .lt. + if ( dble( workl(invsub+(j-1)*ldq+j-1) ) .lt. & dble(zero) ) then call zscal(nconv, -one, workl(iuptri+j-1), ldq) call zscal(nconv, -one, workl(iuptri+(j-1)*ldq), 1) @@ -726,8 +730,8 @@ subroutine zneupd(rvec , howmny, select, d , c | upper triangular, thus the length of the | c | inner product can be set to j. | c %------------------------------------------% -c - workev(j) = zdotc(j, workl(ihbds), 1, +c + workev(j) = zzdotc(j, workl(ihbds), 1, & workl(invsub+(j-1)*ldq), 1) 40 continue c @@ -746,7 +750,7 @@ subroutine zneupd(rvec , howmny, select, d , c %---------------------------------------% c | Copy Ritz estimates into workl(ihbds) | c %---------------------------------------% -c +c call zcopy(nconv, workev, 1, workl(ihbds), 1) c c %----------------------------------------------% @@ -758,7 +762,7 @@ subroutine zneupd(rvec , howmny, select, d , & 'Non-unit', n , nconv , & one , workl(invsub), ldq , & z , ldz) - end if + end if c else c @@ -781,25 +785,25 @@ subroutine zneupd(rvec , howmny, select, d , c if (type .eq. 'REGULR') then c - if (rvec) + if (rvec) & call zscal(ncv, rnorm, workl(ihbds), 1) -c +c else -c +c c %---------------------------------------% c | A spectral transformation was used. | c | * Determine the Ritz estimates of the | c | Ritz values in the original system. | c %---------------------------------------% c - if (rvec) + if (rvec) & call zscal(ncv, rnorm, workl(ihbds), 1) -c +c do 50 k=1, ncv temp = workl(iheig+k-1) workl(ihbds+k-1) = workl(ihbds+k-1) / temp / temp 50 continue -c +c end if c c %-----------------------------------------------------------% @@ -809,7 +813,7 @@ subroutine zneupd(rvec , howmny, select, d , c | NOTES: | c | *The Ritz vectors are not affected by the transformation. | c %-----------------------------------------------------------% -c +c if (type .eq. 'SHIFTI') then do 60 k=1, nconv d(k) = one / workl(iheig+k-1) + sigma @@ -864,7 +868,7 @@ subroutine zneupd(rvec , howmny, select, d , 9000 continue c return -c +c c %---------------% c | End of zneupd| c %---------------% diff --git a/Toolbox/arpack-src/zngets.f b/Toolbox/arpack-src/zngets.f index 903eacb85..e7d243349 100644 --- a/Toolbox/arpack-src/zngets.f +++ b/Toolbox/arpack-src/zngets.f @@ -2,9 +2,9 @@ c c\Name: zngets c -c\Description: +c\Description: c Given the eigenvalues of the upper Hessenberg matrix H, -c computes the NP shifts AMU that are zeros of the polynomial of +c computes the NP shifts AMU that are zeros of the polynomial of c degree NP which filters out components of the unwanted eigenvectors c corresponding to the AMU's based on some given criteria. c @@ -40,8 +40,8 @@ c On INPUT, RITZ contains the the eigenvalues of H. c On OUTPUT, RITZ are sorted so that the unwanted c eigenvalues are in the first NP locations and the wanted -c portion is in the last KEV locations. When exact shifts are -c selected, the unwanted part corresponds to the shifts to +c portion is in the last KEV locations. When exact shifts are +c selected, the unwanted part corresponds to the shifts to c be applied. Also, if ISHIFT .eq. 1, the unwanted eigenvalues c are further sorted so that the ones with largest Ritz values c are first. @@ -49,7 +49,7 @@ c BOUNDS Complex*16 array of length KEV+NP. (INPUT/OUTPUT) c Error bounds corresponding to the ordering in RITZ. c -c +c c c\EndDoc c @@ -63,16 +63,16 @@ c\Routines called: c zsortc ARPACK sorting routine. c ivout ARPACK utility routine that prints integers. -c second ARPACK utility routine for timing. +c arscnd ARPACK utility routine for timing. c zvout ARPACK utility routine that prints vectors. c c\Author c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c\SCCS Information: @(#) c FILE: ngets.F SID: 2.2 DATE OF SID: 4/20/96 RELEASE: 2 @@ -91,8 +91,8 @@ subroutine zngets ( ishift, which, kev, np, ritz, bounds) c | Include files for debugging and timing information | c %----------------------------------------------------% c - include 'debug.fi' - include 'stat.fi' + include 'debug.h' + include 'stat.h' c c %------------------% c | Scalar Arguments | @@ -126,7 +126,7 @@ subroutine zngets ( ishift, which, kev, np, ritz, bounds) c | External Subroutines | c %----------------------% c - external zvout, zsortc, second + external zvout, zsortc, arscnd c c %-----------------------% c | Executable Statements | @@ -136,14 +136,14 @@ subroutine zngets ( ishift, which, kev, np, ritz, bounds) c | Initialize timing statistics | c | & message level for debugging | c %-------------------------------% -c - call second (t0) +c + call arscnd (t0) msglvl = mcgets -c +c call zsortc (which, .true., kev+np, ritz, bounds) -c +c if ( ishift .eq. 1 ) then -c +c c %-------------------------------------------------------% c | Sort the unwanted Ritz values used as shifts so that | c | the ones with largest Ritz estimates are first | @@ -152,27 +152,27 @@ subroutine zngets ( ishift, which, kev, np, ritz, bounds) c | are applied in subroutine znapps. | c | Be careful and use 'SM' since we want to sort BOUNDS! | c %-------------------------------------------------------% -c +c call zsortc ( 'SM', .true., np, bounds, ritz ) c end if -c - call second (t1) +c + call arscnd (t1) tcgets = tcgets + (t1 - t0) c if (msglvl .gt. 0) then - call ivout (logfil, 1, kev, ndigit, '_ngets: KEV is') - call ivout (logfil, 1, np, ndigit, '_ngets: NP is') + call ivout (logfil, 1, [kev], ndigit, '_ngets: KEV is') + call ivout (logfil, 1, [np], ndigit, '_ngets: NP is') call zvout (logfil, kev+np, ritz, ndigit, & '_ngets: Eigenvalues of current H matrix ') - call zvout (logfil, kev+np, bounds, ndigit, + call zvout (logfil, kev+np, bounds, ndigit, & '_ngets: Ritz estimates of the current KEV+NP Ritz values') end if -c +c return -c +c c %---------------% c | End of zngets | c %---------------% -c +c end diff --git a/Toolbox/arpack-src/zsortc.f b/Toolbox/arpack-src/zsortc.f index 7dc688a06..6ea37a42f 100644 --- a/Toolbox/arpack-src/zsortc.f +++ b/Toolbox/arpack-src/zsortc.f @@ -3,9 +3,9 @@ c\Name: zsortc c c\Description: -c Sorts the Complex*16 array in X into the order +c Sorts the Complex*16 array in X into the order c specified by WHICH and optionally applies the permutation to the -c Double precision array Y. +c Double precision array Y. c c\Usage: c call zsortc @@ -15,7 +15,7 @@ c WHICH Character*2. (Input) c 'LM' -> sort X into increasing order of magnitude. c 'SM' -> sort X into decreasing order of magnitude. -c 'LR' -> sort X with real(X) in increasing algebraic order +c 'LR' -> sort X with real(X) in increasing algebraic order c 'SR' -> sort X with real(X) in decreasing algebraic order c 'LI' -> sort X with imag(X) in increasing algebraic order c 'SI' -> sort X with imag(X) in decreasing algebraic order @@ -45,9 +45,9 @@ c Danny Sorensen Phuong Vu c Richard Lehoucq CRPC / Rice University c Dept. of Computational & Houston, Texas -c Applied Mathematics -c Rice University -c Houston, Texas +c Applied Mathematics +c Rice University +c Houston, Texas c c Adapted from the sort routine in LANSO. c @@ -72,7 +72,7 @@ subroutine zsortc (which, apply, n, x, y) c | Array Arguments | c %-----------------% c - Complex*16 + Complex*16 & x(0:n-1), y(0:n-1) c c %---------------% @@ -80,9 +80,9 @@ subroutine zsortc (which, apply, n, x, y) c %---------------% c integer i, igap, j - Complex*16 + Complex*16 & temp - Double precision + Double precision & temp1, temp2 c c %--------------------% @@ -96,14 +96,14 @@ subroutine zsortc (which, apply, n, x, y) c | Intrinsic Functions | c %--------------------% Intrinsic - & dble, dimag + & dble, aimag c c %-----------------------% c | Executable Statements | c %-----------------------% c igap = n / 2 -c +c if (which .eq. 'LM') then c c %--------------------------------------------% @@ -119,8 +119,8 @@ subroutine zsortc (which, apply, n, x, y) c if (j.lt.0) go to 30 c - temp1 = dlapy2(dble(x(j)),dimag(x(j))) - temp2 = dlapy2(dble(x(j+igap)),dimag(x(j+igap))) + temp1 = dlapy2(dble(x(j)),aimag(x(j))) + temp2 = dlapy2(dble(x(j+igap)),aimag(x(j+igap))) c if (temp1.gt.temp2) then temp = x(j) @@ -156,14 +156,14 @@ subroutine zsortc (which, apply, n, x, y) c if (j .lt. 0) go to 60 c - temp1 = dlapy2(dble(x(j)),dimag(x(j))) - temp2 = dlapy2(dble(x(j+igap)),dimag(x(j+igap))) + temp1 = dlapy2(dble(x(j)),aimag(x(j))) + temp2 = dlapy2(dble(x(j+igap)),aimag(x(j+igap))) c if (temp1.lt.temp2) then temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -177,7 +177,7 @@ subroutine zsortc (which, apply, n, x, y) 60 continue igap = igap / 2 go to 40 -c +c else if (which .eq. 'LR') then c c %------------------------------------------------% @@ -197,7 +197,7 @@ subroutine zsortc (which, apply, n, x, y) temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -211,7 +211,7 @@ subroutine zsortc (which, apply, n, x, y) 90 continue igap = igap / 2 go to 70 -c +c else if (which .eq. 'SR') then c c %------------------------------------------------% @@ -230,7 +230,7 @@ subroutine zsortc (which, apply, n, x, y) temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -244,7 +244,7 @@ subroutine zsortc (which, apply, n, x, y) 120 continue igap = igap / 2 go to 100 -c +c else if (which .eq. 'LI') then c c %--------------------------------------------% @@ -259,7 +259,7 @@ subroutine zsortc (which, apply, n, x, y) c if (j.lt.0) go to 150 c - if (dimag(x(j)).gt.dimag(x(j+igap))) then + if (aimag(x(j)).gt.aimag(x(j+igap))) then temp = x(j) x(j) = x(j+igap) x(j+igap) = temp @@ -277,7 +277,7 @@ subroutine zsortc (which, apply, n, x, y) 150 continue igap = igap / 2 go to 130 -c +c else if (which .eq. 'SI') then c c %---------------------------------------------% @@ -292,11 +292,11 @@ subroutine zsortc (which, apply, n, x, y) c if (j.lt.0) go to 180 c - if (dimag(x(j)).lt.dimag(x(j+igap))) then + if (aimag(x(j)).lt.aimag(x(j+igap))) then temp = x(j) x(j) = x(j+igap) x(j+igap) = temp -c +c if (apply) then temp = y(j) y(j) = y(j+igap) @@ -311,7 +311,7 @@ subroutine zsortc (which, apply, n, x, y) igap = igap / 2 go to 160 end if -c +c 9000 continue return c diff --git a/Toolbox/arpack-src/zstatn.f b/Toolbox/arpack-src/zstatn.f index 7766b33f4..ddc5240f3 100644 --- a/Toolbox/arpack-src/zstatn.f +++ b/Toolbox/arpack-src/zstatn.f @@ -13,8 +13,8 @@ subroutine zstatn c | See stat.doc for documentation | c %--------------------------------% c - include 'stat.fi' - + include 'stat.h' + c %-----------------------% c | Executable Statements | c %-----------------------% @@ -24,7 +24,7 @@ subroutine zstatn nrorth = 0 nitref = 0 nrstrt = 0 - + tcaupd = 0.0D+0 tcaup2 = 0.0D+0 tcaitr = 0.0D+0 @@ -35,13 +35,13 @@ subroutine zstatn titref = 0.0D+0 tgetv0 = 0.0D+0 trvec = 0.0D+0 - + c %----------------------------------------------------% c | User time including reverse communication overhead | c %----------------------------------------------------% tmvopx = 0.0D+0 tmvbx = 0.0D+0 - + return c c %---------------% diff --git a/Toolbox/arpack-src/zvout.f b/Toolbox/arpack-src/zvout.f index ac7e6f9fc..8c42eb890 100644 --- a/Toolbox/arpack-src/zvout.f +++ b/Toolbox/arpack-src/zvout.f @@ -63,21 +63,21 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) DO 30 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF (K1.NE.N) THEN - WRITE( LOUT, 9998 )K1, K2, ( CX( I ), + WRITE( LOUT, 9998 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE - WRITE( LOUT, 9997 )K1, K2, ( CX( I ), - $ I = K1, K2 ) + WRITE( LOUT, 9997 )K1, K2, ( CX( I ), + $ I = K1, K2 ) END IF 30 CONTINUE ELSE IF( NDIGIT.LE.6 ) THEN DO 40 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF (K1.NE.N) THEN - WRITE( LOUT, 9988 )K1, K2, ( CX( I ), + WRITE( LOUT, 9988 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE - WRITE( LOUT, 9987 )K1, K2, ( CX( I ), + WRITE( LOUT, 9987 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 40 CONTINUE @@ -85,11 +85,11 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) DO 50 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF (K1.NE.N) THEN - WRITE( LOUT, 9978 )K1, K2, ( CX( I ), + WRITE( LOUT, 9978 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE - WRITE( LOUT, 9977 )K1, K2, ( CX( I ), - $ I = K1, K2 ) + WRITE( LOUT, 9977 )K1, K2, ( CX( I ), + $ I = K1, K2 ) END IF 50 CONTINUE ELSE @@ -104,47 +104,47 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) * ELSE IF( NDIGIT.LE.4 ) THEN - DO 70 K1 = 1, N, 4 + DO 70 K1 = 1, N, 4 K2 = MIN0( N, K1+3 ) IF ((K1+3).LE.N) THEN - WRITE( LOUT, 9958 )K1, K2, ( CX( I ), + WRITE( LOUT, 9958 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+3-N) .EQ. 1) THEN - WRITE( LOUT, 9957 )K1, K2, ( CX( I ), + WRITE( LOUT, 9957 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+3-N) .EQ. 2) THEN - WRITE( LOUT, 9956 )K1, K2, ( CX( I ), + WRITE( LOUT, 9956 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+3-N) .EQ. 1) THEN - WRITE( LOUT, 9955 )K1, K2, ( CX( I ), + WRITE( LOUT, 9955 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 70 CONTINUE ELSE IF( NDIGIT.LE.6 ) THEN - DO 80 K1 = 1, N, 3 + DO 80 K1 = 1, N, 3 K2 = MIN0( N, K1+2 ) IF ((K1+2).LE.N) THEN - WRITE( LOUT, 9948 )K1, K2, ( CX( I ), + WRITE( LOUT, 9948 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 1) THEN - WRITE( LOUT, 9947 )K1, K2, ( CX( I ), + WRITE( LOUT, 9947 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 2) THEN - WRITE( LOUT, 9946 )K1, K2, ( CX( I ), + WRITE( LOUT, 9946 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 80 CONTINUE ELSE IF( NDIGIT.LE.8 ) THEN - DO 90 K1 = 1, N, 3 + DO 90 K1 = 1, N, 3 K2 = MIN0( N, K1+2 ) IF ((K1+2).LE.N) THEN - WRITE( LOUT, 9938 )K1, K2, ( CX( I ), + WRITE( LOUT, 9938 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 1) THEN - WRITE( LOUT, 9937 )K1, K2, ( CX( I ), + WRITE( LOUT, 9937 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 2) THEN - WRITE( LOUT, 9936 )K1, K2, ( CX( I ), + WRITE( LOUT, 9936 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 90 CONTINUE @@ -152,10 +152,10 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) DO 100 K1 = 1, N, 2 K2 = MIN0( N, K1+1 ) IF ((K1+2).LE.N) THEN - WRITE( LOUT, 9928 )K1, K2, ( CX( I ), + WRITE( LOUT, 9928 )K1, K2, ( CX( I ), $ I = K1, K2 ) ELSE IF ((K1+2-N) .EQ. 1) THEN - WRITE( LOUT, 9927 )K1, K2, ( CX( I ), + WRITE( LOUT, 9927 )K1, K2, ( CX( I ), $ I = K1, K2 ) END IF 100 CONTINUE @@ -171,12 +171,12 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) * DISPLAY 4 SIGNIFICANT DIGITS * 9998 FORMAT( 1X, I4, ' - ', I4, ':', 1X, - $ 1P,2('(',D10.3,',',D10.3,') ') ) + $ 1P,2('(',D10.3,',',D10.3,') ') ) 9997 FORMAT( 1X, I4, ' - ', I4, ':', 1X, $ 1P,1('(',D10.3,',',D10.3,') ') ) * * DISPLAY 6 SIGNIFICANT DIGITS -* +* 9988 FORMAT( 1X, I4, ' - ', I4, ':', 1X, $ 1P,2('(',D12.5,',',D12.5,') ') ) 9987 FORMAT( 1X, I4, ' - ', I4, ':', 1X, @@ -192,7 +192,7 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) * DISPLAY 13 SIGNIFICANT DIGITS * 9968 FORMAT( 1X, I4, ' - ', I4, ':', 1X, - $ 1P,1('(',D20.13,',',D20.13,') ') ) + $ 1P,1('(',D20.13,',',D20.13,') ') ) * *========================================================================= * FORMAT FOR 132 COLUMNS @@ -235,6 +235,6 @@ SUBROUTINE ZVOUT( LOUT, N, CX, IDIGIT, IFMT ) $ 1P,1('(',D20.13,',',D20.13,') ') ) * * -* +* 9994 FORMAT( 1X, ' ' ) END diff --git a/Toolbox/arpack-src/zzdotc.f b/Toolbox/arpack-src/zzdotc.f new file mode 100644 index 000000000..a98c34230 --- /dev/null +++ b/Toolbox/arpack-src/zzdotc.f @@ -0,0 +1,36 @@ + double complex function zzdotc(n,zx,incx,zy,incy) +c +c forms the dot product of a vector. +c jack dongarra, 3/11/78. +c modified 12/3/93, array(1) declarations changed to array(*) +c + double complex zx(*),zy(*),ztemp + integer i,incx,incy,ix,iy,n + ztemp = (0.0d0,0.0d0) + zzdotc = (0.0d0,0.0d0) + if(n.le.0)return + if(incx.eq.1.and.incy.eq.1)go to 20 +c +c code for unequal increments or equal increments +c not equal to 1 +c + ix = 1 + iy = 1 + if(incx.lt.0)ix = (-n+1)*incx + 1 + if(incy.lt.0)iy = (-n+1)*incy + 1 + do 10 i = 1,n + ztemp = ztemp + conjg(zx(ix))*zy(iy) + ix = ix + incx + iy = iy + incy + 10 continue + zzdotc = ztemp + return +c +c code for both increments equal to 1 +c + 20 do 30 i = 1,n + ztemp = ztemp + conjg(zx(i))*zy(i) + 30 continue + zzdotc = ztemp + return + end diff --git a/Toolbox/arpack_wrapper.cpp b/Toolbox/arpack_wrapper.cpp index 030d4079a..16667e75e 100644 --- a/Toolbox/arpack_wrapper.cpp +++ b/Toolbox/arpack_wrapper.cpp @@ -20,72 +20,6 @@ #include "arpack_wrapper.h" #include -int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptr>& K, const unsigned num, const char* form) { - auto IDO = 0; - auto BMAT = 'I'; // standard eigenvalue problem A*x=lambda*x - auto N = static_cast(K->n_rows); - char WHICH[2]; - for(auto I = 0; I < 2; ++I) WHICH[I] = form[I]; - auto NEV = std::min(static_cast(num), N - 2); - auto TOL = 0.; - auto NCV = std::min(std::max(NEV + 2, 2 * NEV + 1), N); - auto LDV = N; - auto LWORKL = 3 * NCV * (NCV + 2); - auto INFO = 0; - - podarray IPARAM(11), IPNTR(14); - podarray RESID(N), V(N * uword(NCV)), WORKD(3llu * N), WORKL(LWORKL); - - IPARAM(0) = 1; // exact shift - IPARAM(2) = 1000; // maximum iteration - IPARAM(6) = 1; // mode 1: A*x=lambda*x - - while(IDO != 99) { - arma_fortran(arma_dnaupd)(&IDO, &BMAT, &N, WHICH, &NEV, &TOL, RESID.memptr(), &NCV, V.memptr(), &LDV, IPARAM.memptr(), IPNTR.memptr(), WORKD.memptr(), WORKL.memptr(), &LWORKL, &INFO); - if(IDO == 1 || IDO == -1) { - const vec X(WORKD.memptr() + IPNTR[0] - 1, N, false); - // ReSharper disable once CppInitializedValueIsAlwaysRewritten - // ReSharper disable once CppEntityAssignedButNoRead - vec Y(WORKD.memptr() + IPNTR[1] - 1, N, false); - Y = K * X; - } - } - - if(INFO != 0) return INFO; - - auto RVEC = 1; - auto HOWMNY = 'A'; - auto LDZ = N; - auto SIGMAR = 0.; - auto SIGMAI = 0.; - - podarray SELECT(NCV); - podarray DR(NEV + 1llu), DI(NEV + 1llu), Z(N * (NEV + 1llu)), WORKEV(3llu * NCV); - - arma_fortran(arma_dneupd)(&RVEC, &HOWMNY, SELECT.memptr(), DR.memptr(), DI.memptr(), Z.memptr(), &LDZ, &SIGMAR, &SIGMAI, WORKEV.memptr(), &BMAT, &N, WHICH, &NEV, &TOL, RESID.memptr(), &NCV, V.memptr(), &LDV, IPARAM.memptr(), IPNTR.memptr(), WORKD.memptr(), WORKL.memptr(), &LWORKL, &INFO); - - eigval.set_size(NEV); - eigvec.set_size(N, NEV); - - // get eigenvalues - for(uword I = 0; I < uword(NEV); ++I) eigval(I) = std::complex(DR(I), DI(I)); - - // get eigenvectors - for(uword I = 0; I < uword(NEV); ++I) { - if(I < NEV - 1llu && eigval[I] == std::conj(eigval[I + 1llu])) { - for(uword J = 0; J < uword(N); ++J) { - eigvec.at(J, I) = std::complex(Z[N * I + J], Z[N * I + N + J]); - eigvec.at(J, I + 1llu) = std::complex(Z[N * I + J], -Z[N * I + N + J]); - } - ++I; - } - else if(I == NEV - 1llu && std::complex(eigval[I]).imag() != 0.) for(auto J = 0; J < N; ++J) eigvec.at(J, I) = std::complex(Z[N * I + J], Z[N * I + N + J]); - else for(auto J = 0; J < N; ++J) eigvec.at(J, I) = std::complex(Z[N * I + J], 0.); - } - - return INFO; -} - int eig_solve(vec& eigval, mat& eigvec, const std::shared_ptr>& K, const std::shared_ptr>& M, const unsigned num, const char* form) { auto IDO = 0; auto BMAT = 'G'; // generalized eigenvalue problem A*x=lambda*M*x @@ -126,9 +60,13 @@ int eig_solve(vec& eigval, mat& eigvec, const std::shared_ptr>& const vec X(WORKD.memptr() + IPNTR[0] - 1, N, false); Y = K * X; } + else if(0 != INFO) break; } - if(0 != INFO) return INFO; + if(0 != INFO) { + suanpan_error("arpack solver returns %d.\n", INFO); + return SUANPAN_FAIL; + } suanpan_debug("Arnoldi iteration counter: %d.\n", IPARAM(2)); @@ -154,7 +92,7 @@ int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptr(num), N - 2); auto TOL = 0.; - auto NCV = std::min(std::max(NEV + 2, 2 * NEV + 1), N); + auto NCV = std::min(std::max(NEV + 3, 2 * NEV + 1), N); auto LDV = N; auto LWORKL = 3 * NCV * (NCV + 2); auto INFO = 0; @@ -164,14 +102,12 @@ int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptrsolve(X); - first_solve = false; - } - else Y = K->solve(X); + Y = K->solve(X); } else if(1 == IDO) { const vec X(WORKD.memptr() + IPNTR[2] - 1, N, false); @@ -193,147 +125,14 @@ int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptr SELECT(NCV); - podarray DR(NEV + 1llu), DI(NEV + 1llu), Z(N * (NEV + 1llu)), WORKEV(3llu * NCV); - - arma_fortran(arma_dneupd)(&RVEC, &HOWMNY, SELECT.memptr(), DR.memptr(), DI.memptr(), Z.memptr(), &LDZ, &SIGMAR, &SIGMAI, WORKEV.memptr(), &BMAT, &N, WHICH, &NEV, &TOL, RESID.memptr(), &NCV, V.memptr(), &LDV, IPARAM.memptr(), IPNTR.memptr(), WORKD.memptr(), WORKL.memptr(), &LWORKL, &INFO); - - eigval.set_size(NEV); - eigvec.set_size(N, NEV); - - // get eigenvalues - for(uword I = 0; I < uword(NEV); ++I) eigval(I) = std::complex(DR(I), DI(I)); - - // get eigenvectors - for(uword I = 0; I < uword(NEV); ++I) { - if(I < NEV - 1llu && eigval[I] == std::conj(eigval[I + 1llu])) { - for(uword J = 0; J < uword(N); ++J) { - eigvec.at(J, I) = std::complex(Z[N * I + J], Z[N * I + N + J]); - eigvec.at(J, I + 1llu) = std::complex(Z[N * I + J], -Z[N * I + N + J]); - } - ++I; - } - else if(I == NEV - 1llu && std::complex(eigval[I]).imag() != 0.) for(auto J = 0; J < N; ++J) eigvec.at(J, I) = std::complex(Z[N * I + J], Z[N * I + N + J]); - else for(auto J = 0; J < N; ++J) eigvec.at(J, I) = std::complex(Z[N * I + J], 0.); - } - - return INFO; -} - -int eig_solve(vec& eigval, mat& eigvec, const std::shared_ptr>& K, const std::shared_ptr>& KG) { - auto IDO = 0; - auto BMAT = 'G'; // generalized eigenvalue problem A*x=lambda*M*x - auto N = static_cast(K->n_cols); - char WHICH[2] = {'S', 'M'}; - auto NEV = 1; - auto TOL = 0.; - auto NCV = std::min(2 * NEV, N); - auto LDV = N; - auto LWORKL = 2 * NCV * (NCV + 8); - auto INFO = 0; - - podarray IPARAM(11), IPNTR(14); - podarray RESID(N), V(uword(N) * uword(NCV)), WORKD(5 * uword(N)), WORKL(LWORKL); - - IPARAM(0) = 1; // exact shift - IPARAM(2) = 1000; // maximum iteration - IPARAM(6) = 4; // mode 4: K*x=lambda*KG*x - - auto SIGMA = -1.; - - KG *= SIGMA; - - // for buckling analysis KG from FEM is actually -KG in ARPACK - KG += K; - - while(99 != IDO) { - arma_fortran(arma_dsaupd)(&IDO, &BMAT, &N, WHICH, &NEV, &TOL, RESID.memptr(), &NCV, V.memptr(), &LDV, IPARAM.memptr(), IPNTR.memptr(), WORKD.memptr(), WORKL.memptr(), &LWORKL, &INFO); - // ReSharper disable once CppEntityAssignedButNoRead - if(vec Y(WORKD.memptr() + IPNTR[1] - 1, N, false); -1 == IDO) { - vec X(WORKD.memptr() + IPNTR[0] - 1, N, false); - X = K * X; - Y = KG->solve(X); - } - else if(1 == IDO) { - const vec X(WORKD.memptr() + IPNTR[2] - 1, N, false); - Y = KG->solve(X); - } - else if(2 == IDO) { - const vec X(WORKD.memptr() + IPNTR[0] - 1, N, false); - Y = K * X; - } - } - - if(0 != INFO) return INFO; - - suanpan_debug("Arnoldi iteration counter: %d.\n", IPARAM(2)); - - auto RVEC = 1; - auto HOWMNY = 'A'; - auto LDZ = N; - - podarray SELECT(NCV); - - eigval.set_size(NEV); - eigvec.set_size(N, NEV); - - arma_fortran(arma_dseupd)(&RVEC, &HOWMNY, SELECT.memptr(), eigval.memptr(), eigvec.memptr(), &LDZ, &SIGMA, &BMAT, &N, WHICH, &NEV, &TOL, RESID.memptr(), &NCV, V.memptr(), &LDV, IPARAM.memptr(), IPNTR.memptr(), WORKD.memptr(), WORKL.memptr(), &LWORKL, &INFO); - - return INFO; -} - -int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptr>& K, const std::shared_ptr>& KG) { - auto IDO = 0; - auto BMAT = 'G'; // standard eigenvalue problem A*x=lambda*x - auto N = static_cast(K->n_rows); - char WHICH[2] = {'L', 'M'}; - auto NEV = 1; - auto TOL = 0.; - auto NCV = std::min(std::max(NEV + 2, 2 * NEV + 1), N); - auto LDV = N; - auto LWORKL = 3 * NCV * (NCV + 2); - auto INFO = 0; - - podarray IPARAM(11), IPNTR(14); - podarray RESID(N), V(N * uword(NCV)), WORKD(3llu * N), WORKL(LWORKL); - - IPARAM(0) = 1; // exact shift - IPARAM(2) = 1000; // maximum iteration - IPARAM(6) = 3; // mode 1: K*x=lambda*KG*x - - auto SIGMAR = -1.; - auto SIGMAI = 0.; - - K -= KG; - - while(99 != IDO) { - arma_fortran(arma_dnaupd)(&IDO, &BMAT, &N, WHICH, &NEV, &TOL, RESID.memptr(), &NCV, V.memptr(), &LDV, IPARAM.memptr(), IPNTR.memptr(), WORKD.memptr(), WORKL.memptr(), &LWORKL, &INFO); - // ReSharper disable once CppEntityAssignedButNoRead - if(vec Y(WORKD.memptr() + IPNTR[1] - 1, N, false); -1 == IDO) { - vec X(WORKD.memptr() + IPNTR[0] - 1, N, false); - X = KG * X; - Y = K->solve(X); - } - else if(1 == IDO) { - const vec X(WORKD.memptr() + IPNTR[2] - 1, N, false); - Y = K->solve(X); - } - else if(2 == IDO) { - const vec X(WORKD.memptr() + IPNTR[0] - 1, N, false); - Y = KG * X; - } + if(0 != INFO) { + suanpan_error("arpack solver returns %d.\n", INFO); + return SUANPAN_FAIL; } - if(INFO != 0) return INFO; - auto RVEC = 1; auto HOWMNY = 'A'; auto LDZ = N; diff --git a/Toolbox/arpack_wrapper.h b/Toolbox/arpack_wrapper.h index b56766904..068f4ba59 100644 --- a/Toolbox/arpack_wrapper.h +++ b/Toolbox/arpack_wrapper.h @@ -21,17 +21,8 @@ #include #include -// general matrix -int eig_solve(cx_vec&, cx_mat&, const std::shared_ptr>&, unsigned, const char* = "SM"); +int eig_solve(vec&, mat&, const std::shared_ptr>&, const std::shared_ptr>&, unsigned, const char* = "SM"); -// modal analysis -int eig_solve(vec& eigval, mat& eigvec, const std::shared_ptr>&, const std::shared_ptr>&, unsigned, const char* = "SM"); - -int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptr>&, const std::shared_ptr>&, unsigned, const char* = "LM"); - -// buckling analysis -int eig_solve(vec& eigval, mat& eigvec, const std::shared_ptr>&, const std::shared_ptr>&); - -int eig_solve(cx_vec& eigval, cx_mat& eigvec, const std::shared_ptr>&, const std::shared_ptr>&); +int eig_solve(cx_vec&, cx_mat&, const std::shared_ptr>&, const std::shared_ptr>&, unsigned, const char* = "LM"); #endif diff --git a/Toolbox/commandParser.cpp b/Toolbox/commandParser.cpp index 0d64ba219..1fbb44a45 100644 --- a/Toolbox/commandParser.cpp +++ b/Toolbox/commandParser.cpp @@ -400,7 +400,7 @@ int process_command(const shared_ptr& model, istringstream& command) { auto flag = true; for(const auto& t_integrator : domain->get_integrator_pool()) - if(t_integrator->get_domain().lock() != nullptr) { + if(t_integrator->get_domain() != nullptr) { t_integrator->clear_status(); flag = false; } @@ -415,7 +415,7 @@ int process_command(const shared_ptr& model, istringstream& command) { auto flag = true; for(const auto& t_integrator : domain->get_integrator_pool()) - if(t_integrator->get_domain().lock() != nullptr) { + if(t_integrator->get_domain() != nullptr) { t_integrator->reset_status(); flag = false; } @@ -1129,6 +1129,12 @@ int set_property(const shared_ptr& domain, istringstream& command) { return SUANPAN_SUCCESS; } + if(is_equal(property_id, "linear_system")) { + domain->set_attribute(ModalAttribute::LinearSystem); + + return SUANPAN_SUCCESS; + } + if(domain->get_current_step_tag() == 0) return SUANPAN_SUCCESS; const auto& t_step = domain->get_current_step(); diff --git a/Toolbox/fext/CMakeLists.txt b/Toolbox/fext/CMakeLists.txt index 9b1b8348c..e49a207bf 100644 --- a/Toolbox/fext/CMakeLists.txt +++ b/Toolbox/fext/CMakeLists.txt @@ -9,7 +9,6 @@ include(../../Driver.cmake) add_library(${PROJECT_NAME} ${LIBRARY_TYPE} $ $ - $ $ $ $ diff --git a/Toolbox/lapack-ext/CMakeLists.txt b/Toolbox/lapack-ext/CMakeLists.txt deleted file mode 100644 index 7a794fe27..000000000 --- a/Toolbox/lapack-ext/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 3.13.0) - -project(spmm Fortran) - -set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/../../Libs) - -file(GLOB SPMM_SRC "*.f") - -include(../../Driver.cmake) - -add_library(spmm_obj OBJECT ${SPMM_SRC}) - -add_library(${PROJECT_NAME} ${LIBRARY_TYPE} $) - -message(STATUS "SPMM Fortran_FLAGS: ${CMAKE_Fortran_FLAGS}") \ No newline at end of file diff --git a/Toolbox/lapack-ext/dspmm.f b/Toolbox/lapack-ext/dspmm.f deleted file mode 100644 index ef94bc4f0..000000000 --- a/Toolbox/lapack-ext/dspmm.f +++ /dev/null @@ -1,458 +0,0 @@ -* Purpose: To compute alpha*op(A,B)+beta*C, op(A,B) could be either -* A*B, -* B*A, -* A*B**T, -* B**T*A. -* -* \param[in] SIDE -* SIDE is CHARACTER*1 -* Select which problem to compute: -* 'L' C=alpha*B*A+beta*C, -* 'R' C=alpha*A*B+beta*C. -* -* \param[in] UPLO -* UPLO is CHARACTER*1 -* Select which part of A is stored: -* 'U' Upper Triangle, -* 'L' Lower Triangle. -* -* \param[in] TRAN -* TRAN is CHARACTER*1 -* Select if B is transverse: -* 'N' No transverse, -* 'T' Transverse. -* -* \param[in] M -* M is INTEGER -* The size of square matrix A. -* -* \param[in] N -* N is INTEGER -* Another dimension of matrix B. -* For SIDE='L', B=>(N,M), -* For SIDE='R', B=>(M,N). -* -* \param[in] ALPHA -* ALPHA is DOUBLEPRECISION -* The factor. -* -* \param[in] A -* A is DOUBLEPRECISION(*) array of DIMENSION ((M+1)*M/2) -* -* \param[in] B -* B is DOUBLEPRECISION(*,*) array of DIMENSION (M,N) or (N,M) -* -* \param[in] LDB -* LDB is INTEGER -* The leading dimension of matrix B, should be at least max(1,M) or max(1,N). -* -* \param[in] BETA -* BETA is DOUBLEPRECISION -* The factor. -* -* \param[in/out] C -* C is DOUBLEPRECISION(*,*) array of DIMENSION (M,N) or (N,M) -* -* \param[in] LDC -* LDC is INTEGER -* The leading dimension of matrix C, should be at least max(1,M) or max(1,N) based on SIDE. -* - SUBROUTINE DSPMM(SIDE,UPLO,TRAN,M,N,A,ALPHA,B,LDB,BETA,C,LDC) - - !...INPUT ARGUMENTS... - CHARACTER SIDE,UPLO,TRAN - INTEGER M,N,LDB,LDC - DOUBLEPRECISION ALPHA,BETA,A(*),B(LDB,*),C(LDC,*) - - !...TEMP VARIABLES... - INTEGER I,J,K,X,Y,Z,DIMA,DIMB,PTYPE,INFO - DOUBLEPRECISION TEMPA - LOGICAL S,U,T - - !...TWO CONSTANTS... - DOUBLEPRECISION ZERO,ONE - PARAMETER (ZERO=0.0D+0,ONE=1.0D+0) - - !...EXTERNAL SUBROUTINES... - LOGICAL LSAME - EXTERNAL LSAME - EXTERNAL XERBLA - - !...FLAGS... - S=LSAME(SIDE,'R') - U=LSAME(UPLO,'U') - T=LSAME(TRAN,'N') - - !...CHECK IF ACCEPTABLE ARGUMENTS ARE GIVEN... - INFO=0 - IF((.NOT.S).AND.(.NOT.LSAME(SIDE,'L')))THEN - INFO=1 - ELSEIF((.NOT.U).AND.(.NOT.LSAME(UPLO,'U')))THEN - INFO=2 - ELSEIF((.NOT.T).AND.(.NOT.LSAME(TRAN,'T')))THEN - INFO=3 - ENDIF - - IF(INFO.NE.0)THEN - CALL XERBLA('DSPMM ',INFO) - RETURN - ENDIF - - !...SWITCH TO PROPER DIMENSION... - !...THE DIMENSION OF C IS AWAYS (DIMA,DIMB)... - IF(S)THEN - DIMA=M - DIMB=N - ELSE - DIMA=N - DIMB=M - ENDIF - - !...QUICK RETURN... - IF(ALPHA.EQ.ZERO)THEN - IF(BETA.EQ.ZERO)THEN - DO 20 J=1,DIMB - DO 10 I=1,DIMA - C(I,J)=ZERO - 10 CONTINUE - 20 CONTINUE - ELSEIF(BETA.NE.ONE)THEN - DO 40 J=1,DIMB - DO 30 I=1,DIMA - C(I,J)=BETA*C(I,J) - 30 CONTINUE - 40 CONTINUE - ENDIF - RETURN - ENDIF - - !...ALPHA.NE.ZERO... - !...CHECK beta*C FIRST... - IF(BETA.EQ.ZERO)THEN - DO 60 J=1,DIMB - DO 50 I=1,DIMA - C(I,J)=ZERO - 50 CONTINUE - 60 CONTINUE - ELSEIF(BETA.NE.ONE)THEN - DO 80 J=1,DIMB - DO 70 I=1,DIMA - C(I,J)=BETA*C(I,J) - 70 CONTINUE - 80 CONTINUE - ENDIF - - !...ASSIGN PROBLEM TYPE ACCORDING TO GIVEN FLAGS... - PTYPE=0000 - IF(S)PTYPE=PTYPE+1000 - IF(U)PTYPE=PTYPE+100 - IF(T)PTYPE=PTYPE+10 - IF(ALPHA.EQ.ONE)PTYPE=PTYPE+1 - - X=1 - - !...U*B - IF(PTYPE==1111)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(J,K) - C(J,K)=C(J,K)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...U*B**T - IF(PTYPE==1101)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(K,J) - C(J,K)=C(J,K)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B*U - IF(PTYPE==0111)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(K,J) - C(K,J)=C(K,J)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B**T*U - IF(PTYPE==0101)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(J,K) - C(K,J)=C(K,J)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...L*B - IF(PTYPE==1011)THEN - DO J=1,M - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(J,K) - C(J,K)=C(J,K)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...L*B**T - IF(PTYPE==1001)THEN - DO J=1,M - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(K,J) - C(J,K)=C(J,K)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B*L - IF(PTYPE==0011)THEN - DO J=1,M - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(K,J) - C(K,J)=C(K,J)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B**T*L - IF(PTYPE==0001)THEN - DO J=1,M - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(J,K) - C(K,J)=C(K,J)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...U*B - IF(PTYPE==1110)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(J,K) - C(J,K)=C(J,K)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...U*B**T - IF(PTYPE==1100)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(K,J) - C(J,K)=C(J,K)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B*U - IF(PTYPE==0110)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(K,J) - C(K,J)=C(K,J)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B**T*U - IF(PTYPE==0100)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(J,K) - C(K,J)=C(K,J)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...L*B - IF(PTYPE==1010)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(J,K) - C(J,K)=C(J,K)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...L*B**T - IF(PTYPE==1000)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(K,J) - C(J,K)=C(J,K)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B*L - IF(PTYPE==0010)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(K,J) - C(K,J)=C(K,J)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B**T*L - IF(PTYPE==0000)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(J,K) - C(K,J)=C(K,J)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - END diff --git a/Toolbox/lapack-ext/sspmm.f b/Toolbox/lapack-ext/sspmm.f deleted file mode 100644 index ecd9a427b..000000000 --- a/Toolbox/lapack-ext/sspmm.f +++ /dev/null @@ -1,458 +0,0 @@ -* Purpose: To compute alpha*op(A,B)+beta*C, op(A,B) could be either -* A*B, -* B*A, -* A*B**T, -* B**T*A. -* -* \param[in] SIDE -* SIDE is CHARACTER*1 -* Select which problem to compute: -* 'L' C=alpha*B*A+beta*C, -* 'R' C=alpha*A*B+beta*C. -* -* \param[in] UPLO -* UPLO is CHARACTER*1 -* Select which part of A is stored: -* 'U' Upper Triangle, -* 'L' Lower Triangle. -* -* \param[in] TRAN -* TRAN is CHARACTER*1 -* Select if B is transverse: -* 'N' No transverse, -* 'T' Transverse. -* -* \param[in] M -* M is INTEGER -* The size of square matrix A. -* -* \param[in] N -* N is INTEGER -* Another dimension of matrix B. -* For SIDE='L', B=>(N,M), -* For SIDE='R', B=>(M,N). -* -* \param[in] ALPHA -* ALPHA is REAL -* The factor. -* -* \param[in] A -* A is REAL(*) array of DIMENSION ((M+1)*M/2) -* -* \param[in] B -* B is REAL(*,*) array of DIMENSION (M,N) or (N,M) -* -* \param[in] LDB -* LDB is INTEGER -* The leading dimension of matrix B, should be at least max(1,M) or max(1,N). -* -* \param[in] BETA -* BETA is REAL -* The factor. -* -* \param[in/out] C -* C is REAL(*,*) array of DIMENSION (M,N) or (N,M) -* -* \param[in] LDC -* LDC is INTEGER -* The leading dimension of matrix C, should be at least max(1,M) or max(1,N) based on SIDE. -* - SUBROUTINE SSPMM(SIDE,UPLO,TRAN,M,N,A,ALPHA,B,LDB,BETA,C,LDC) - - !...INPUT ARGUMENTS... - CHARACTER SIDE,UPLO,TRAN - INTEGER M,N,LDB,LDC - REAL ALPHA,BETA,A(*),B(LDB,*),C(LDC,*) - - !...TEMP VARIABLES... - INTEGER I,J,K,X,Y,Z,DIMA,DIMB,PTYPE,INFO - REAL TEMPA - LOGICAL S,U,T - - !...TWO CONSTANTS... - REAL ZERO,ONE - PARAMETER (ZERO=0.0D+0,ONE=1.0D+0) - - !...EXTERNAL SUBROUTINES... - LOGICAL LSAME - EXTERNAL LSAME - EXTERNAL XERBLA - - !...FLAGS... - S=LSAME(SIDE,'R') - U=LSAME(UPLO,'U') - T=LSAME(TRAN,'N') - - !...CHECK IF ACCEPTABLE ARGUMENTS ARE GIVEN... - INFO=0 - IF((.NOT.S).AND.(.NOT.LSAME(SIDE,'L')))THEN - INFO=1 - ELSEIF((.NOT.U).AND.(.NOT.LSAME(UPLO,'U')))THEN - INFO=2 - ELSEIF((.NOT.T).AND.(.NOT.LSAME(TRAN,'T')))THEN - INFO=3 - ENDIF - - IF(INFO.NE.0)THEN - CALL XERBLA('SSPMM ',INFO) - RETURN - ENDIF - - !...SWITCH TO PROPER DIMENSION... - !...THE DIMENSION OF C IS AWAYS (DIMA,DIMB)... - IF(S)THEN - DIMA=M - DIMB=N - ELSE - DIMA=N - DIMB=M - ENDIF - - !...QUICK RETURN... - IF(ALPHA.EQ.ZERO)THEN - IF(BETA.EQ.ZERO)THEN - DO 20 J=1,DIMB - DO 10 I=1,DIMA - C(I,J)=ZERO - 10 CONTINUE - 20 CONTINUE - ELSEIF(BETA.NE.ONE)THEN - DO 40 J=1,DIMB - DO 30 I=1,DIMA - C(I,J)=BETA*C(I,J) - 30 CONTINUE - 40 CONTINUE - ENDIF - RETURN - ENDIF - - !...ALPHA.NE.ZERO... - !...CHECK beta*C FIRST... - IF(BETA.EQ.ZERO)THEN - DO 60 J=1,DIMB - DO 50 I=1,DIMA - C(I,J)=ZERO - 50 CONTINUE - 60 CONTINUE - ELSEIF(BETA.NE.ONE)THEN - DO 80 J=1,DIMB - DO 70 I=1,DIMA - C(I,J)=BETA*C(I,J) - 70 CONTINUE - 80 CONTINUE - ENDIF - - !...ASSIGN PROBLEM TYPE ACCORDING TO GIVEN FLAGS... - PTYPE=0000 - IF(S)PTYPE=PTYPE+1000 - IF(U)PTYPE=PTYPE+100 - IF(T)PTYPE=PTYPE+10 - IF(ALPHA.EQ.ONE)PTYPE=PTYPE+1 - - X=1 - - !...U*B - IF(PTYPE==1111)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(J,K) - C(J,K)=C(J,K)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...U*B**T - IF(PTYPE==1101)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(K,J) - C(J,K)=C(J,K)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B*U - IF(PTYPE==0111)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(K,J) - C(K,J)=C(K,J)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B**T*U - IF(PTYPE==0101)THEN - DO J=1,M - DO I=1,J-1 - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(J,K) - C(K,J)=C(K,J)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...L*B - IF(PTYPE==1011)THEN - DO J=1,M - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(J,K) - C(J,K)=C(J,K)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...L*B**T - IF(PTYPE==1001)THEN - DO J=1,M - DO K=1,N - C(J,K)=C(J,K)+A(X)*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(I,K)=C(I,K)+A(X)*B(K,J) - C(J,K)=C(J,K)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B*L - IF(PTYPE==0011)THEN - DO J=1,M - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(K,J) - C(K,J)=C(K,J)+A(X)*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B**T*L - IF(PTYPE==0001)THEN - DO J=1,M - DO K=1,N - C(K,J)=C(K,J)+A(X)*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - DO K=1,N - C(K,I)=C(K,I)+A(X)*B(J,K) - C(K,J)=C(K,J)+A(X)*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...U*B - IF(PTYPE==1110)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(J,K) - C(J,K)=C(J,K)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...U*B**T - IF(PTYPE==1100)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(K,J) - C(J,K)=C(J,K)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B*U - IF(PTYPE==0110)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(K,J) - C(K,J)=C(K,J)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(K,J) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...B**T*U - IF(PTYPE==0100)THEN - DO J=1,M - DO I=1,J-1 - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(J,K) - C(K,J)=C(K,J)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(J,K) - ENDDO - X=X+1 - ENDDO - RETURN - ENDIF - - !...L*B - IF(PTYPE==1010)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(J,K) - C(J,K)=C(J,K)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...L*B**T - IF(PTYPE==1000)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(J,K)=C(J,K)+TEMPA*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(I,K)=C(I,K)+TEMPA*B(K,J) - C(J,K)=C(J,K)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B*L - IF(PTYPE==0010)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(K,J) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(K,J) - C(K,J)=C(K,J)+TEMPA*B(K,I) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - !...B**T*L - IF(PTYPE==0000)THEN - DO J=1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,J)=C(K,J)+TEMPA*B(J,K) - ENDDO - X=X+1 - DO I=J+1,M - TEMPA=ALPHA*A(X) - DO K=1,N - C(K,I)=C(K,I)+TEMPA*B(J,K) - C(K,J)=C(K,J)+TEMPA*B(I,K) - ENDDO - X=X+1 - ENDDO - ENDDO - RETURN - ENDIF - - END diff --git a/Toolbox/sync_ostream.h b/Toolbox/sync_ostream.h index cb4112098..95d5e235c 100644 --- a/Toolbox/sync_ostream.h +++ b/Toolbox/sync_ostream.h @@ -23,6 +23,7 @@ class sync_ostream { std::unique_lock print_lock; std::ostream* ostream; + public: explicit sync_ostream(std::ostream&); sync_ostream(const sync_ostream&) = delete; diff --git a/Toolbox/tensorToolbox.h b/Toolbox/tensorToolbox.h index 08858d5ea..52e6d7d98 100644 --- a/Toolbox/tensorToolbox.h +++ b/Toolbox/tensorToolbox.h @@ -87,8 +87,7 @@ namespace tensor { double double_contraction(const vec&, const vec&); double double_contraction(vec&&, vec&&); } // namespace stress - -} // namespace tensor +} // namespace tensor namespace transform { double atan2(const vec&); @@ -175,7 +174,6 @@ namespace transform { namespace triangle { vec to_area_coordinate(const vec&, const mat&); } - } // namespace transform namespace suanpan { diff --git a/Toolbox/utility.h b/Toolbox/utility.h index 0b82cb260..6ffcb4e25 100644 --- a/Toolbox/utility.h +++ b/Toolbox/utility.h @@ -18,17 +18,29 @@ #ifndef UTILITY_H #define UTILITY_H -#include #include +#include +#ifdef __cpp_lib_execution +#include +#endif template void suanpan_for(const IT start, const IT end, F&& FN) { #ifdef SUANPAN_MT - tbb::parallel_for(start, end, std::forward(FN)); + static tbb::affinity_partitioner ap; + tbb::parallel_for(start, end, std::forward(FN), ap); #else for(IT I = start; I < end; ++I) FN(I); #endif } +template constexpr T suanpan_max_element(T start, T end) { +#ifdef __cpp_lib_execution + return std::max_element(std::execution::par, start, end); +#else + return std::max_element(start, end); +#endif +} + namespace suanpan { template [[maybe_unused]] const std::vector& unique(std::vector& container) { std::sort(container.begin(), container.end()); @@ -66,7 +78,7 @@ template bool get_input(istringstream& I, Col& O) { return code; } -template bool get_input(istringstream& I, T& O, U&...R) { return static_cast(I >> O) ? get_input(I, R...) : false; } +template bool get_input(istringstream& I, T& O, U&... R) { return static_cast(I >> O) ? get_input(I, R...) : false; } template T get_input(istringstream& I) { T O; @@ -88,7 +100,7 @@ template bool get_optional_input(istringstream& I, Col& O) { return code; } -template bool get_optional_input(istringstream& I, T& O, U&...R) { +template bool get_optional_input(istringstream& I, T& O, U&... R) { if(I.eof()) return true; return static_cast(I >> O) ? get_optional_input(I, R...) : false; diff --git a/UnitTest/CMakeLists.txt b/UnitTest/CMakeLists.txt index 9539dd640..b3476f909 100644 --- a/UnitTest/CMakeLists.txt +++ b/UnitTest/CMakeLists.txt @@ -1,11 +1,13 @@ target_sources(${PROJECT_NAME} PRIVATE CatchTest.cpp TestColoring.cpp + TestEigen.cpp TestIntegration.cpp TestMatrix.cpp TestMode.cpp TestNURBS.cpp TestQuaternion.cpp + TestSampling.cpp TestShape.cpp TestSolver.cpp TestSorting.cpp @@ -13,5 +15,4 @@ target_sources(${PROJECT_NAME} PRIVATE TestSurfaceNM.cpp TestTensor.cpp TestUtility.cpp - TestSampling.cpp ) diff --git a/UnitTest/CatchTest.cpp b/UnitTest/CatchTest.cpp index 85eff01a3..55f0fdb17 100644 --- a/UnitTest/CatchTest.cpp +++ b/UnitTest/CatchTest.cpp @@ -4,7 +4,6 @@ #include "CatchHeader.h" int catchtest_main(const int argc, char** argv) { - for(auto I = 1; I < argc; ++I) if(constexpr auto t_argv = ""; is_equal(argv[I], "-ctest") || is_equal(argv[I], "--catchtest")) argv[I] = const_cast(t_argv); return Catch::Session().run(argc, argv); diff --git a/UnitTest/TestEigen.cpp b/UnitTest/TestEigen.cpp new file mode 100644 index 000000000..4b8811c7e --- /dev/null +++ b/UnitTest/TestEigen.cpp @@ -0,0 +1,48 @@ +#include +#include "CatchHeader.h" +#include +#include + +TEST_CASE("Eigensolver", "[Utility.Eigen]") { + constexpr auto N = 100; + constexpr auto Q = 6; + + const vec D = regspace(1, 1, N); + + for(auto L = 0; L < N; ++L) { + const mat P = orth(randn(D.n_elem, D.n_elem)); + + mat K = P * diagmat(D) * P.t(); + + mat M = 2. * eye(size(K)); + + auto KK = make_shared>(D.n_elem, D.n_elem); + + for(auto I = 0llu; I < D.n_elem; ++I) for(auto J = 0llu; J < D.n_elem; ++J) KK->at(J, I) = K(J, I); + + auto MM = make_shared>(D.n_elem, D.n_elem); + + for(auto I = 0llu; I < D.n_elem; ++I) MM->at(I, I) = M(I, I); + + vec eigval; + mat eigvec; + + REQUIRE(eig_solve(eigval, eigvec, KK, MM->make_copy(), Q, "SM") == 0); + + for(auto I = 0; I < Q; ++I) + REQUIRE(Approx(eigval(I)) == .5 * I + .5); + + REQUIRE(eig_solve(eigval, eigvec, KK, MM->make_copy(), Q, "LM") == 0); + + for(auto I = 0; I < Q; ++I) + REQUIRE(Approx(eigval(Q - 1 - I)) == .5 * (N - I)); + + cx_vec cx_eigval; + cx_mat cx_eigvec; + + REQUIRE(eig_solve(cx_eigval, cx_eigvec, KK->make_copy(), MM->make_copy(), Q, "LM") == 0); + + for(auto I = 0; I < Q; ++I) + REQUIRE(Approx(cx_eigval(I).real()) == .5 * I + .5); + } +} diff --git a/UnitTest/TestMatrix.cpp b/UnitTest/TestMatrix.cpp index 4fe409a98..4405fc5bf 100644 --- a/UnitTest/TestMatrix.cpp +++ b/UnitTest/TestMatrix.cpp @@ -57,18 +57,18 @@ template void benchmark_mat_solve(string&& title, vec D; BENCHMARK((title + " Full").c_str()) { - clear_mat(); - A.solve(D, C); - REQUIRE(norm(E - D) < static_cast(C.n_elem) * tol); - }; + clear_mat(); + A.solve(D, C); + REQUIRE(norm(E - D) < static_cast(C.n_elem) * tol); + }; A.get_solver_setting().precision = Precision::MIXED; BENCHMARK((title + " Mixed").c_str()) { - clear_mat(); - A.solve(D, C); - REQUIRE(norm(E - D) < static_cast(C.n_elem) * tol); - }; + clear_mat(); + A.solve(D, C); + REQUIRE(norm(E - D) < static_cast(C.n_elem) * tol); + }; } template void benchmark_mat_setup(const int I) { @@ -384,16 +384,16 @@ TEST_CASE("Benchmark Triplet Assembly", "[Matrix.Sparse]") { for(auto J = 2; J != REPEAT; J *= 2) BENCHMARK(string("Assemble " + std::to_string(J)).c_str()) { - triplet_form C(N + REPEAT, N + REPEAT, B.n_elem * REPEAT); + triplet_form C(N + REPEAT, N + REPEAT, B.n_elem * REPEAT); - for(auto I = 0; I < J; ++I) C.assemble(B, I, I, randu()); + for(auto I = 0; I < J; ++I) C.assemble(B, I, I, randu()); - REQUIRE(C.n_elem == NNZ * J); + REQUIRE(C.n_elem == NNZ * J); - C.csc_condense(); + C.csc_condense(); - return C; - }; + return C; + }; } TEST_CASE("Triplet/CSR/CSC Conversion", "[Matrix.Sparse]") { diff --git a/UnitTest/TestUtility.cpp b/UnitTest/TestUtility.cpp index e385fbc22..536e41993 100644 --- a/UnitTest/TestUtility.cpp +++ b/UnitTest/TestUtility.cpp @@ -19,18 +19,18 @@ TEST_CASE("Sign", "[Utility.Sign]") { TEST_CASE("Matrix Allocation", "[Utility.Matrix]") { BENCHMARK("Static Size 20") { - mat::fixed<20, 20> A(fill::randn); - A(10, 10) = 1.; - REQUIRE(A.n_elem == 400); - return A; - }; + mat::fixed<20, 20> A(fill::randn); + A(10, 10) = 1.; + REQUIRE(A.n_elem == 400); + return A; + }; BENCHMARK("Dynamic Size 20") { - mat A(20, 20, fill::randn); - A(10, 10) = 1.; - REQUIRE(A.n_elem == 400); - return A; - }; + mat A(20, 20, fill::randn); + A(10, 10) = 1.; + REQUIRE(A.n_elem == 400); + return A; + }; } TEST_CASE("Sync Stream", "[Utility.Print]") { diff --git a/snapcraft.yaml b/snapcraft.yaml index c2e8e6489..a31871fc1 100644 --- a/snapcraft.yaml +++ b/snapcraft.yaml @@ -9,7 +9,7 @@ issues: https://github.com/TLCFEM/suanPan/issues license: GPL-3.0 source-code: https://github.com/TLCFEM/suanPan summary: An Open Source, Parallel and Heterogeneous Finite Element Analysis Framework -version: "2.6.1" +version: "2.7" website: https://bit.ly/suanpan-doc description: | [**suanPan**](https://github.com/TLCFEM/suanPan) is a finite element method (FEM) simulation platform for applications @@ -57,8 +57,8 @@ parts: override-build: | wget -q https://github.com/TLCFEM/prebuilds/releases/download/latest/VTK-9.2.2-linux.tar.gz tar xf VTK-9.2.2-linux.tar.gz - wget -q https://registrationcenter-download.intel.com/akdlm/irc_nas/18898/l_onemkl_p_2022.2.0.8748_offline.sh - sh ./l_onemkl_p_2022.2.0.8748_offline.sh -a --silent --eula accept + wget -q https://registrationcenter-download.intel.com/akdlm/irc_nas/19138/l_onemkl_p_2023.0.0.25398_offline.sh + sh ./l_onemkl_p_2023.0.0.25398_offline.sh -a --silent --eula accept craftctl default stage-packages: - libgfortran5 diff --git a/suanPan.cpp b/suanPan.cpp index 65dfd57e3..1fb9ed9c1 100644 --- a/suanPan.cpp +++ b/suanPan.cpp @@ -27,6 +27,9 @@ BOOL WIN_EVENT(DWORD) { return TRUE; } // ReSharper disable once CppParameterMayBeConst int main(int argc, char** argv) { #ifdef SUANPAN_WIN +#if defined(SUANPAN_DEBUG) && defined(SUANPAN_MSVC) + _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); +#endif if(!SetConsoleCtrlHandler(WIN_EVENT, TRUE)) return 0; const auto handle = GetStdHandle(STD_OUTPUT_HANDLE); CONSOLE_SCREEN_BUFFER_INFO info;