From 778b2ddaf40d271f5bac20cc9c9d0683210bea96 Mon Sep 17 00:00:00 2001 From: ritukesh Date: Thu, 2 May 2024 17:12:04 +0200 Subject: [PATCH] closes #25, amgcl source files removed --- src/solvers/amgcl/LICENSE.md | 22 - src/solvers/amgcl/README.md | 49 - src/solvers/amgcl/adapter/block_matrix.hpp | 249 --- src/solvers/amgcl/adapter/complex.hpp | 176 --- src/solvers/amgcl/adapter/crs_builder.hpp | 178 --- src/solvers/amgcl/adapter/crs_tuple.hpp | 218 --- src/solvers/amgcl/adapter/eigen.hpp | 125 -- src/solvers/amgcl/adapter/epetra.hpp | 157 -- src/solvers/amgcl/adapter/reorder.hpp | 238 --- src/solvers/amgcl/adapter/scaled_problem.hpp | 166 -- src/solvers/amgcl/adapter/ublas.hpp | 75 - src/solvers/amgcl/adapter/zero_copy.hpp | 98 -- src/solvers/amgcl/amg.hpp | 602 -------- src/solvers/amgcl/backend/blaze.hpp | 332 ---- src/solvers/amgcl/backend/block_crs.hpp | 313 ---- src/solvers/amgcl/backend/builtin.hpp | 1372 ----------------- src/solvers/amgcl/backend/builtin_hybrid.hpp | 69 - src/solvers/amgcl/backend/cuda.hpp | 753 --------- .../backend/detail/default_direct_solver.hpp | 74 - .../amgcl/backend/detail/matrix_ops.hpp | 174 --- src/solvers/amgcl/backend/detail/mixing.hpp | 80 - src/solvers/amgcl/backend/eigen.hpp | 277 ---- src/solvers/amgcl/backend/hpx.hpp | 966 ------------ src/solvers/amgcl/backend/interface.hpp | 449 ------ src/solvers/amgcl/backend/mkl.hpp | 294 ---- src/solvers/amgcl/backend/vexcl.hpp | 493 ------ .../amgcl/backend/vexcl_static_matrix.hpp | 1031 ------------- src/solvers/amgcl/backend/viennacl.hpp | 462 ------ src/solvers/amgcl/coarsening/aggregation.hpp | 164 -- src/solvers/amgcl/coarsening/as_scalar.hpp | 119 -- .../amgcl/coarsening/detail/galerkin.hpp | 53 - .../coarsening/detail/scaled_galerkin.hpp | 59 - .../amgcl/coarsening/plain_aggregates.hpp | 213 --- .../amgcl/coarsening/pointwise_aggregates.hpp | 197 --- .../amgcl/coarsening/rigid_body_modes.hpp | 134 -- src/solvers/amgcl/coarsening/ruge_stuben.hpp | 458 ------ src/solvers/amgcl/coarsening/runtime.hpp | 299 ---- .../amgcl/coarsening/smoothed_aggr_emin.hpp | 363 ----- .../amgcl/coarsening/smoothed_aggregation.hpp | 248 --- .../coarsening/tentative_prolongation.hpp | 233 --- src/solvers/amgcl/deflated_solver.hpp | 276 ---- src/solvers/amgcl/detail/inverse.hpp | 102 -- src/solvers/amgcl/detail/qr.hpp | 601 -------- src/solvers/amgcl/detail/sort_row.hpp | 59 - src/solvers/amgcl/detail/spgemm.hpp | 509 ------ src/solvers/amgcl/io/binary.hpp | 173 --- src/solvers/amgcl/io/ios_saver.hpp | 56 - src/solvers/amgcl/io/mm.hpp | 420 ----- src/solvers/amgcl/make_block_solver.hpp | 81 - src/solvers/amgcl/make_solver.hpp | 230 --- src/solvers/amgcl/mpi/amg.hpp | 499 ------ .../amgcl/mpi/block_preconditioner.hpp | 105 -- .../amgcl/mpi/coarsening/aggregation.hpp | 124 -- src/solvers/amgcl/mpi/coarsening/pmis.hpp | 1122 -------------- src/solvers/amgcl/mpi/coarsening/runtime.hpp | 194 --- .../mpi/coarsening/smoothed_aggregation.hpp | 213 --- src/solvers/amgcl/mpi/cpr.hpp | 472 ------ .../amgcl/mpi/direct_solver/eigen_splu.hpp | 105 -- .../amgcl/mpi/direct_solver/pastix.hpp | 244 --- .../amgcl/mpi/direct_solver/runtime.hpp | 299 ---- .../amgcl/mpi/direct_solver/skyline_lu.hpp | 99 -- .../amgcl/mpi/direct_solver/solver_base.hpp | 272 ---- src/solvers/amgcl/mpi/distributed_matrix.hpp | 1314 ---------------- src/solvers/amgcl/mpi/inner_product.hpp | 72 - src/solvers/amgcl/mpi/make_solver.hpp | 196 --- src/solvers/amgcl/mpi/partition/merge.hpp | 143 -- src/solvers/amgcl/mpi/partition/parmetis.hpp | 214 --- src/solvers/amgcl/mpi/partition/ptscotch.hpp | 216 --- src/solvers/amgcl/mpi/partition/runtime.hpp | 248 --- src/solvers/amgcl/mpi/partition/util.hpp | 324 ---- src/solvers/amgcl/mpi/preconditioner.hpp | 326 ---- .../mpi/relaxation/as_preconditioner.hpp | 105 -- .../amgcl/mpi/relaxation/chebyshev.hpp | 60 - .../amgcl/mpi/relaxation/damped_jacobi.hpp | 60 - .../amgcl/mpi/relaxation/gauss_seidel.hpp | 79 - src/solvers/amgcl/mpi/relaxation/ilu0.hpp | 60 - src/solvers/amgcl/mpi/relaxation/iluk.hpp | 60 - src/solvers/amgcl/mpi/relaxation/ilup.hpp | 60 - src/solvers/amgcl/mpi/relaxation/ilut.hpp | 60 - src/solvers/amgcl/mpi/relaxation/runtime.hpp | 337 ---- src/solvers/amgcl/mpi/relaxation/spai0.hpp | 127 -- src/solvers/amgcl/mpi/relaxation/spai1.hpp | 60 - .../amgcl/mpi/schur_pressure_correction.hpp | 674 -------- src/solvers/amgcl/mpi/solver/bicgstab.hpp | 52 - src/solvers/amgcl/mpi/solver/bicgstabl.hpp | 53 - src/solvers/amgcl/mpi/solver/cg.hpp | 53 - src/solvers/amgcl/mpi/solver/fgmres.hpp | 53 - src/solvers/amgcl/mpi/solver/gmres.hpp | 53 - src/solvers/amgcl/mpi/solver/idrs.hpp | 53 - src/solvers/amgcl/mpi/solver/lgmres.hpp | 53 - src/solvers/amgcl/mpi/solver/preonly.hpp | 52 - src/solvers/amgcl/mpi/solver/richardson.hpp | 53 - src/solvers/amgcl/mpi/solver/runtime.hpp | 53 - src/solvers/amgcl/mpi/subdomain_deflation.hpp | 610 -------- src/solvers/amgcl/mpi/util.hpp | 236 --- src/solvers/amgcl/perf_counter/clock.hpp | 72 - .../amgcl/perf_counter/cray_energy.hpp | 72 - .../amgcl/perf_counter/mpi_aggregator.hpp | 123 -- src/solvers/amgcl/preconditioner/cpr.hpp | 561 ------- src/solvers/amgcl/preconditioner/cpr_drs.hpp | 625 -------- src/solvers/amgcl/preconditioner/dummy.hpp | 105 -- src/solvers/amgcl/preconditioner/runtime.hpp | 423 ----- .../schur_pressure_correction.hpp | 633 -------- src/solvers/amgcl/profiler.hpp | 216 --- src/solvers/amgcl/relaxation/as_block.hpp | 131 -- .../amgcl/relaxation/as_preconditioner.hpp | 125 -- src/solvers/amgcl/relaxation/chebyshev.hpp | 210 --- .../amgcl/relaxation/cusparse_ilu0.hpp | 548 ------- .../amgcl/relaxation/damped_jacobi.hpp | 143 -- .../amgcl/relaxation/detail/ilu_solve.hpp | 471 ------ src/solvers/amgcl/relaxation/gauss_seidel.hpp | 395 ----- src/solvers/amgcl/relaxation/ilu0.hpp | 250 --- src/solvers/amgcl/relaxation/iluk.hpp | 280 ---- src/solvers/amgcl/relaxation/ilup.hpp | 222 --- src/solvers/amgcl/relaxation/ilut.hpp | 386 ----- src/solvers/amgcl/relaxation/runtime.hpp | 383 ----- src/solvers/amgcl/relaxation/spai0.hpp | 122 -- src/solvers/amgcl/relaxation/spai1.hpp | 176 --- src/solvers/amgcl/reorder/cuthill_mckee.hpp | 191 --- src/solvers/amgcl/solver/bicgstab.hpp | 304 ---- src/solvers/amgcl/solver/bicgstabl.hpp | 499 ------ src/solvers/amgcl/solver/cg.hpp | 258 ---- .../solver/detail/default_inner_product.hpp | 61 - .../amgcl/solver/detail/givens_rotations.hpp | 67 - src/solvers/amgcl/solver/eigen.hpp | 108 -- src/solvers/amgcl/solver/fgmres.hpp | 302 ---- src/solvers/amgcl/solver/gmres.hpp | 322 ---- src/solvers/amgcl/solver/idrs.hpp | 493 ------ src/solvers/amgcl/solver/lgmres.hpp | 444 ------ src/solvers/amgcl/solver/precond_side.hpp | 100 -- src/solvers/amgcl/solver/preonly.hpp | 141 -- src/solvers/amgcl/solver/richardson.hpp | 230 --- src/solvers/amgcl/solver/runtime.hpp | 282 ---- src/solvers/amgcl/solver/skyline_lu.hpp | 315 ---- src/solvers/amgcl/util.hpp | 425 ----- src/solvers/amgcl/value_type/complex.hpp | 103 -- src/solvers/amgcl/value_type/eigen.hpp | 184 --- src/solvers/amgcl/value_type/interface.hpp | 205 --- .../amgcl/value_type/static_matrix.hpp | 342 ---- 139 files changed, 36504 deletions(-) delete mode 100644 src/solvers/amgcl/LICENSE.md delete mode 100644 src/solvers/amgcl/README.md delete mode 100644 src/solvers/amgcl/adapter/block_matrix.hpp delete mode 100644 src/solvers/amgcl/adapter/complex.hpp delete mode 100644 src/solvers/amgcl/adapter/crs_builder.hpp delete mode 100644 src/solvers/amgcl/adapter/crs_tuple.hpp delete mode 100644 src/solvers/amgcl/adapter/eigen.hpp delete mode 100644 src/solvers/amgcl/adapter/epetra.hpp delete mode 100644 src/solvers/amgcl/adapter/reorder.hpp delete mode 100644 src/solvers/amgcl/adapter/scaled_problem.hpp delete mode 100644 src/solvers/amgcl/adapter/ublas.hpp delete mode 100644 src/solvers/amgcl/adapter/zero_copy.hpp delete mode 100644 src/solvers/amgcl/amg.hpp delete mode 100644 src/solvers/amgcl/backend/blaze.hpp delete mode 100644 src/solvers/amgcl/backend/block_crs.hpp delete mode 100644 src/solvers/amgcl/backend/builtin.hpp delete mode 100644 src/solvers/amgcl/backend/builtin_hybrid.hpp delete mode 100644 src/solvers/amgcl/backend/cuda.hpp delete mode 100644 src/solvers/amgcl/backend/detail/default_direct_solver.hpp delete mode 100644 src/solvers/amgcl/backend/detail/matrix_ops.hpp delete mode 100644 src/solvers/amgcl/backend/detail/mixing.hpp delete mode 100644 src/solvers/amgcl/backend/eigen.hpp delete mode 100644 src/solvers/amgcl/backend/hpx.hpp delete mode 100644 src/solvers/amgcl/backend/interface.hpp delete mode 100644 src/solvers/amgcl/backend/mkl.hpp delete mode 100644 src/solvers/amgcl/backend/vexcl.hpp delete mode 100644 src/solvers/amgcl/backend/vexcl_static_matrix.hpp delete mode 100644 src/solvers/amgcl/backend/viennacl.hpp delete mode 100644 src/solvers/amgcl/coarsening/aggregation.hpp delete mode 100644 src/solvers/amgcl/coarsening/as_scalar.hpp delete mode 100644 src/solvers/amgcl/coarsening/detail/galerkin.hpp delete mode 100644 src/solvers/amgcl/coarsening/detail/scaled_galerkin.hpp delete mode 100644 src/solvers/amgcl/coarsening/plain_aggregates.hpp delete mode 100644 src/solvers/amgcl/coarsening/pointwise_aggregates.hpp delete mode 100644 src/solvers/amgcl/coarsening/rigid_body_modes.hpp delete mode 100644 src/solvers/amgcl/coarsening/ruge_stuben.hpp delete mode 100644 src/solvers/amgcl/coarsening/runtime.hpp delete mode 100644 src/solvers/amgcl/coarsening/smoothed_aggr_emin.hpp delete mode 100644 src/solvers/amgcl/coarsening/smoothed_aggregation.hpp delete mode 100644 src/solvers/amgcl/coarsening/tentative_prolongation.hpp delete mode 100644 src/solvers/amgcl/deflated_solver.hpp delete mode 100644 src/solvers/amgcl/detail/inverse.hpp delete mode 100644 src/solvers/amgcl/detail/qr.hpp delete mode 100644 src/solvers/amgcl/detail/sort_row.hpp delete mode 100644 src/solvers/amgcl/detail/spgemm.hpp delete mode 100644 src/solvers/amgcl/io/binary.hpp delete mode 100644 src/solvers/amgcl/io/ios_saver.hpp delete mode 100644 src/solvers/amgcl/io/mm.hpp delete mode 100644 src/solvers/amgcl/make_block_solver.hpp delete mode 100644 src/solvers/amgcl/make_solver.hpp delete mode 100644 src/solvers/amgcl/mpi/amg.hpp delete mode 100644 src/solvers/amgcl/mpi/block_preconditioner.hpp delete mode 100644 src/solvers/amgcl/mpi/coarsening/aggregation.hpp delete mode 100644 src/solvers/amgcl/mpi/coarsening/pmis.hpp delete mode 100644 src/solvers/amgcl/mpi/coarsening/runtime.hpp delete mode 100644 src/solvers/amgcl/mpi/coarsening/smoothed_aggregation.hpp delete mode 100644 src/solvers/amgcl/mpi/cpr.hpp delete mode 100644 src/solvers/amgcl/mpi/direct_solver/eigen_splu.hpp delete mode 100644 src/solvers/amgcl/mpi/direct_solver/pastix.hpp delete mode 100644 src/solvers/amgcl/mpi/direct_solver/runtime.hpp delete mode 100644 src/solvers/amgcl/mpi/direct_solver/skyline_lu.hpp delete mode 100644 src/solvers/amgcl/mpi/direct_solver/solver_base.hpp delete mode 100644 src/solvers/amgcl/mpi/distributed_matrix.hpp delete mode 100644 src/solvers/amgcl/mpi/inner_product.hpp delete mode 100644 src/solvers/amgcl/mpi/make_solver.hpp delete mode 100644 src/solvers/amgcl/mpi/partition/merge.hpp delete mode 100644 src/solvers/amgcl/mpi/partition/parmetis.hpp delete mode 100644 src/solvers/amgcl/mpi/partition/ptscotch.hpp delete mode 100644 src/solvers/amgcl/mpi/partition/runtime.hpp delete mode 100644 src/solvers/amgcl/mpi/partition/util.hpp delete mode 100644 src/solvers/amgcl/mpi/preconditioner.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/as_preconditioner.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/chebyshev.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/damped_jacobi.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/gauss_seidel.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/ilu0.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/iluk.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/ilup.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/ilut.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/runtime.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/spai0.hpp delete mode 100644 src/solvers/amgcl/mpi/relaxation/spai1.hpp delete mode 100644 src/solvers/amgcl/mpi/schur_pressure_correction.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/bicgstab.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/bicgstabl.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/cg.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/fgmres.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/gmres.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/idrs.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/lgmres.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/preonly.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/richardson.hpp delete mode 100644 src/solvers/amgcl/mpi/solver/runtime.hpp delete mode 100644 src/solvers/amgcl/mpi/subdomain_deflation.hpp delete mode 100644 src/solvers/amgcl/mpi/util.hpp delete mode 100644 src/solvers/amgcl/perf_counter/clock.hpp delete mode 100644 src/solvers/amgcl/perf_counter/cray_energy.hpp delete mode 100644 src/solvers/amgcl/perf_counter/mpi_aggregator.hpp delete mode 100644 src/solvers/amgcl/preconditioner/cpr.hpp delete mode 100644 src/solvers/amgcl/preconditioner/cpr_drs.hpp delete mode 100644 src/solvers/amgcl/preconditioner/dummy.hpp delete mode 100644 src/solvers/amgcl/preconditioner/runtime.hpp delete mode 100644 src/solvers/amgcl/preconditioner/schur_pressure_correction.hpp delete mode 100644 src/solvers/amgcl/profiler.hpp delete mode 100644 src/solvers/amgcl/relaxation/as_block.hpp delete mode 100644 src/solvers/amgcl/relaxation/as_preconditioner.hpp delete mode 100644 src/solvers/amgcl/relaxation/chebyshev.hpp delete mode 100644 src/solvers/amgcl/relaxation/cusparse_ilu0.hpp delete mode 100644 src/solvers/amgcl/relaxation/damped_jacobi.hpp delete mode 100644 src/solvers/amgcl/relaxation/detail/ilu_solve.hpp delete mode 100644 src/solvers/amgcl/relaxation/gauss_seidel.hpp delete mode 100644 src/solvers/amgcl/relaxation/ilu0.hpp delete mode 100644 src/solvers/amgcl/relaxation/iluk.hpp delete mode 100644 src/solvers/amgcl/relaxation/ilup.hpp delete mode 100644 src/solvers/amgcl/relaxation/ilut.hpp delete mode 100644 src/solvers/amgcl/relaxation/runtime.hpp delete mode 100644 src/solvers/amgcl/relaxation/spai0.hpp delete mode 100644 src/solvers/amgcl/relaxation/spai1.hpp delete mode 100644 src/solvers/amgcl/reorder/cuthill_mckee.hpp delete mode 100644 src/solvers/amgcl/solver/bicgstab.hpp delete mode 100644 src/solvers/amgcl/solver/bicgstabl.hpp delete mode 100644 src/solvers/amgcl/solver/cg.hpp delete mode 100644 src/solvers/amgcl/solver/detail/default_inner_product.hpp delete mode 100644 src/solvers/amgcl/solver/detail/givens_rotations.hpp delete mode 100644 src/solvers/amgcl/solver/eigen.hpp delete mode 100644 src/solvers/amgcl/solver/fgmres.hpp delete mode 100644 src/solvers/amgcl/solver/gmres.hpp delete mode 100644 src/solvers/amgcl/solver/idrs.hpp delete mode 100644 src/solvers/amgcl/solver/lgmres.hpp delete mode 100644 src/solvers/amgcl/solver/precond_side.hpp delete mode 100644 src/solvers/amgcl/solver/preonly.hpp delete mode 100644 src/solvers/amgcl/solver/richardson.hpp delete mode 100644 src/solvers/amgcl/solver/runtime.hpp delete mode 100644 src/solvers/amgcl/solver/skyline_lu.hpp delete mode 100644 src/solvers/amgcl/util.hpp delete mode 100644 src/solvers/amgcl/value_type/complex.hpp delete mode 100644 src/solvers/amgcl/value_type/eigen.hpp delete mode 100644 src/solvers/amgcl/value_type/interface.hpp delete mode 100644 src/solvers/amgcl/value_type/static_matrix.hpp diff --git a/src/solvers/amgcl/LICENSE.md b/src/solvers/amgcl/LICENSE.md deleted file mode 100644 index 7b70f24..0000000 --- a/src/solvers/amgcl/LICENSE.md +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License -=============== - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/src/solvers/amgcl/README.md b/src/solvers/amgcl/README.md deleted file mode 100644 index 7a9f272..0000000 --- a/src/solvers/amgcl/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# AMGCL - -[![Documentation Status](https://readthedocs.org/projects/amgcl/badge/?version=latest)](http://amgcl.readthedocs.io/en/latest/?badge=latest) -[![DOI](https://zenodo.org/badge/6987353.svg)](https://zenodo.org/badge/latestdoi/6987353) -[![Build Status](https://travis-ci.org/ddemidov/amgcl.svg?branch=master)](https://travis-ci.org/ddemidov/amgcl) -[![Build status](https://ci.appveyor.com/api/projects/status/r0s4lbln4qf9r8aq/branch/master?svg=true)](https://ci.appveyor.com/project/ddemidov/amgcl/branch/master) -[![codecov](https://codecov.io/gh/ddemidov/amgcl/branch/master/graph/badge.svg)](https://codecov.io/gh/ddemidov/amgcl) -[![Coverity Scan Build Status](https://scan.coverity.com/projects/5301/badge.svg)](https://scan.coverity.com/projects/5301) - -AMGCL is a header-only C++ library for solving large sparse linear systems with -algebraic multigrid (AMG) method. AMG is one of the most effective iterative -methods for solution of equation systems arising, for example, from -discretizing PDEs on unstructured grids. The method can be used as a black-box -solver for various computational problems, since it does not require any -information about the underlying geometry. AMG is often used not as a -standalone solver but as a preconditioner within an iterative solver (e.g. -Conjugate Gradients, BiCGStab, or GMRES). - -AMGCL builds the AMG hierarchy on a CPU and then transfers it to one of the -provided backends. This allows for transparent acceleration of the solution -phase with help of OpenCL, CUDA, or OpenMP technologies. Users may provide -their own backends which enables tight integration between AMGCL and the user -code. - -See AMGCL documentation at http://amgcl.readthedocs.io/ - -## Referencing - -Demidov, Denis. AMGCL: An efficient, flexible, and extensible algebraic multigrid -implementation. Lobachevskii Journal of Mathematics, 40(5):535–546, May 2019. -[doi](https://doi.org/10.1134/S1995080219050056) [pdf](https://rdcu.be/bHFsY) -[bib](https://raw.githubusercontent.com/ddemidov/amgcl/master/docs/demidov19.bib) - -Demidov, Denis. AMGCL -- A C++ library for efficient solution of large sparse linear -systems. Software Impacts, 6:100037, November 2020. -[doi](https://doi.org/10.1016/j.simpa.2020.100037) -[bib](https://raw.githubusercontent.com/ddemidov/amgcl/master/docs/demidov20.bib) - -Demidov, Denis, Lin Mu, and Bin Wang. Accelerating linear solvers for Stokes -problems with C++ metaprogramming. Journal of Computational Science (2020): -101285. -[doi](https://doi.org/10.1016/j.jocs.2020.101285) -[arxiv](https://arxiv.org/pdf/2006.06052.pdf) -[bib](https://raw.githubusercontent.com/ddemidov/amgcl/master/docs/demidov-mu-wang-20.bib) - -## Support - -* GitHub issues page: https://github.com/ddemidov/amgcl/issues -* Mailing list: https://groups.google.com/forum/#!forum/amgcl diff --git a/src/solvers/amgcl/adapter/block_matrix.hpp b/src/solvers/amgcl/adapter/block_matrix.hpp deleted file mode 100644 index ea7eedd..0000000 --- a/src/solvers/amgcl/adapter/block_matrix.hpp +++ /dev/null @@ -1,249 +0,0 @@ -#ifndef AMGCL_ADAPTER_BLOCK_MATRIX_HPP -#define AMGCL_ADAPTER_BLOCK_MATRIX_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/block_matrix.hpp -\author Denis Demidov -\brief On-the-fly conversion to block valued matrix. -\ingroup adapters -*/ - -#include -#include -#include -#include - -namespace amgcl { -namespace adapter { - -template -struct block_matrix_adapter { - typedef BlockType value_type; - static const int BlockSize = math::static_rows::value; - - const Matrix &A; - - block_matrix_adapter(const Matrix &A) : A(A) { - precondition( - backend::rows(A) % BlockSize == 0 && - backend::cols(A) % BlockSize == 0, - "Matrix size is not divisible by block size!" - ); - } - - size_t rows() const { - return backend::rows(A) / BlockSize; - } - - size_t cols() const { - return backend::cols(A) / BlockSize; - } - - size_t nonzeros() const { - // Just an estimate: - return backend::nonzeros(A) / (BlockSize * BlockSize); - } - - struct row_iterator { - typedef typename backend::row_iterator::type Base; - typedef ptrdiff_t col_type; - typedef BlockType val_type; - - std::array buf; - Base * base; - - bool done; - col_type cur_col; - val_type cur_val; - - row_iterator(const Matrix &A, col_type row) - : base(reinterpret_cast(buf.data())), done(true) - { - for(int i = 0; i < BlockSize; ++i) { - new (base + i) Base(backend::row_begin(A, row * BlockSize + i)); - - if (base[i]) { - col_type col = base[i].col() / BlockSize; - if (done) { - cur_col = col; - done = false; - } else { - cur_col = std::min(cur_col, col); - } - } - } - - if (done) return; - - // While we are gathering the current value, - // base iteratirs are advanced to the next block-column. - cur_val = math::zero(); - col_type end = (cur_col + 1) * BlockSize; - for(int i = 0; i < BlockSize; ++i) { - for(; base[i] && static_cast(base[i].col()) < end; ++base[i]) { - cur_val(i, base[i].col() % BlockSize) = base[i].value(); - } - } - } - - ~row_iterator() { - for(int i = 0; i < BlockSize; ++i) base[i].~Base(); - } - - operator bool() const { - return !done; - } - - row_iterator& operator++() { - // Base iterators are already at the next block-column. - // We just need to gather the current column and value. - done = true; - - col_type end = (cur_col + 1) * BlockSize; - for(int i = 0; i < BlockSize; ++i) { - if (base[i]) { - col_type col = base[i].col() / BlockSize; - if (done) { - cur_col = col; - done = false; - } else { - cur_col = std::min(cur_col, col); - } - } - } - - if (done) return *this; - - cur_val = math::zero(); - end = (cur_col + 1) * BlockSize; - for(int i = 0; i < BlockSize; ++i) { - for(; base[i] && static_cast(base[i].col()) < end; ++base[i]) { - cur_val(i, base[i].col() % BlockSize) = base[i].value(); - } - } - - return *this; - } - - col_type col() const { - return cur_col; - } - - val_type value() const { - return cur_val; - } - }; - - row_iterator row_begin(size_t i) const { - return row_iterator(A, i); - } -}; - -/// Convert scalar-valued matrix to a block-valued one. -template -block_matrix_adapter block_matrix(const Matrix &A) { - return block_matrix_adapter(A); -} - -template -std::shared_ptr< - backend::crs< - typename math::element_of< - typename backend::value_type::type - >::type, - typename backend::col_type::type, - typename backend::ptr_type::type - > - > -unblock_matrix(const Matrix &B) { - typedef typename backend::value_type::type Block; - typedef typename math::element_of::type Scalar; - typedef typename backend::col_type::type Col; - typedef typename backend::ptr_type::type Ptr; - - const int brows = math::static_rows::value; - const int bcols = math::static_cols::value; - - static_assert(brows > 1 || bcols > 1, "Can not unblock scalar matrix!"); - - auto A = std::make_shared>(); - - A->set_size(backend::rows(B) * brows, backend::cols(B) * bcols); - A->ptr[0] = 0; - - const ptrdiff_t nb = backend::rows(B); - -#pragma omp for - for (ptrdiff_t ib = 0; ib < nb; ++ib) { - auto w = backend::row_nonzeros(B, ib); - for (ptrdiff_t i = 0, ia = ib * brows; i < brows; ++i, ++ia) { - A->ptr[ia + 1] = w * bcols; - } - } - - A->scan_row_sizes(); - A->set_nonzeros(); - -#pragma omp for - for (ptrdiff_t ib = 0; ib < nb; ++ib) { - for(auto b = backend::row_begin(B, ib); b; ++b) { - auto c = b.col(); - auto v = b.value(); - - for (ptrdiff_t i = 0, ia = ib * brows; i < brows; ++i, ++ia) { - auto row_head = A->ptr[ia]; - for(int j = 0; j < bcols; ++j) { - A->col[row_head] = c * bcols + j; - A->val[row_head] = v(i,j); - ++row_head; - } - A->ptr[ia] = row_head; - } - } - } - - std::rotate(A->ptr, A->ptr + A->nrows, A->ptr + A->nrows + 1); - A->ptr[0] = 0; - - return A; -} - -} // namespace adapter - -namespace backend { -namespace detail { - -template -struct use_builtin_matrix_ops< adapter::block_matrix_adapter > - : std::true_type -{}; - -} // namespace detail -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/complex.hpp b/src/solvers/amgcl/adapter/complex.hpp deleted file mode 100644 index dbbacfa..0000000 --- a/src/solvers/amgcl/adapter/complex.hpp +++ /dev/null @@ -1,176 +0,0 @@ -#ifndef AMGCL_ADAPTER_COMPLEX_HPP -#define AMGCL_ADAPTER_COMPLEX_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/complex.hpp -\author Denis Demidov -\brief Complex-valued matrix adapter. -\ingroup adapters -*/ - -#include -#include - -#include -#include - -namespace amgcl { -namespace adapter { - -template -struct complex_adapter { - static_assert(is_complex::type>::value, - "value type should be complex"); - - typedef typename backend::value_type::type::value_type value_type; - - const Matrix &A; - - complex_adapter(const Matrix &A) : A(A) {} - - size_t rows() const { - return 2 * backend::rows(A); - } - - size_t cols() const { - return 2 * backend::cols(A); - } - - size_t nonzeros() const { - return 4 * backend::nonzeros(A); - } - - struct row_iterator { - typedef typename backend::row_iterator::type Base; - typedef typename Base::col_type col_type; - - row_iterator(const Base &base, bool row_real) - : base(base), row_real(row_real), col_real(true) {} - - operator bool() const { - return static_cast(base); - } - - row_iterator& operator++() { - col_real = !col_real; - if (col_real) ++base; - - return *this; - } - - col_type col() const { - if (col_real) - return base.col() * 2; - else - return base.col() * 2 + 1; - } - - value_type value() const { - if (row_real) { - if (col_real) - return std::real(base.value()); - else - return -std::imag(base.value()); - } else { - if (col_real) - return std::imag(base.value()); - else - return std::real(base.value()); - } - } - - private: - Base base; - bool row_real; - bool col_real; - - }; - - row_iterator row_begin(size_t i) const { - return row_iterator(backend::row_begin(A, i / 2), i % 2 == 0); - } -}; - -template -complex_adapter complex_matrix(const Matrix &A) { - return complex_adapter(A); -} - -template -boost::iterator_range< - typename std::add_pointer< - typename std::conditional< - std::is_const::value, - typename std::add_const< - typename boost::range_value< - typename std::decay::type - >::type::value_type - >::type, - typename boost::range_value< - typename std::decay::type - >::type::value_type - >::type - >::type - > -complex_range(Range &rng) { - typedef - typename std::add_pointer< - typename std::conditional< - std::is_const::value, - typename std::add_const< - typename boost::range_value< - typename std::decay::type - >::type::value_type - >::type, - typename boost::range_value< - typename std::decay::type - >::type::value_type - >::type - >::type - pointer_type; - - pointer_type b = reinterpret_cast(&rng[0]); - pointer_type e = b + 2 * boost::size(rng); - - return boost::iterator_range(b, e); -} - -} // namespace adapter - -namespace backend { -namespace detail { - -template -struct use_builtin_matrix_ops< amgcl::adapter::complex_adapter > - : std::true_type -{}; - -} // namespace detail -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/crs_builder.hpp b/src/solvers/amgcl/adapter/crs_builder.hpp deleted file mode 100644 index 0065aad..0000000 --- a/src/solvers/amgcl/adapter/crs_builder.hpp +++ /dev/null @@ -1,178 +0,0 @@ -#ifndef AMGCL_ADAPTER_CRS_BUILDER_HPP -#define AMGCL_ADAPTER_CRS_BUILDER_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/crs_builder.hpp -\author Denis Demidov -\brief Matrix builder that creates matrix rows as needed. -\ingroup adapters - -Example: -\code -struct poisson_2d { - typedef double val_type; - typedef long col_type; - - poisson_2d(size_t n) : n(n), h2i((n - 1) * (n - 1)) {} - - // Number of rows in the constructed matrix: - size_t rows() const { return n * n; } - - // Estimated number of nonzeros in the problem: - size_t nonzeros() const { return 5 * rows(); } - - // Fills column numbers and values of nonzero elements in the given matrix row. - void operator()(size_t row, - std::vector &col, - std::vector &val - ) const - { - size_t i = row % n; - size_t j = row / n; - - if (j > 0) { - col.push_back(row - n); - val.push_back(-h2i); - } - - if (i > 0) { - col.push_back(row - 1); - val.push_back(-h2i); - } - - col.push_back(row); - val.push_back(4 * h2i); - - if (i + 1 < n) { - col.push_back(row + 1); - val.push_back(-h2i); - } - - if (j + 1 < n) { - col.push_back(row + n); - val.push_back(-h2i); - } - } - - private: - size_t n; - double h2i; -}; - -amgcl::make_solver< - Backend, Coarsening, Relaxation, IterativeSolver - > solve( amgcl::backend::make_matrix( poisson_2d(m) ) ); -\endcode -*/ - -#include -#include - -namespace amgcl { - -/// Matrix adapters. -namespace adapter { - -/// Generates matrix rows as needed with help of user-provided functor. -/** - * The generated rows are not stored anywhere. - */ -template -struct matrix_builder { - typedef typename RowBuilder::val_type value_type; - typedef typename RowBuilder::col_type col_type; - - RowBuilder build_row; - - matrix_builder(const RowBuilder &row_builder) : build_row(row_builder) {} - - size_t rows() const { return build_row.rows(); } - size_t cols() const { return build_row.rows(); } - size_t nonzeros() const { return build_row.nonzeros(); } - - struct row_iterator { - typedef typename RowBuilder::col_type col_type; - typedef typename RowBuilder::val_type val_type; - - typedef typename std::vector::const_iterator col_iterator; - typedef typename std::vector::const_iterator val_iterator; - - row_iterator(const RowBuilder &build_row, size_t i) : ptr(0) - { - build_row(i, m_col, m_val); - } - - operator bool() const { - return m_col.size() - ptr; - } - - row_iterator& operator++() { - ++ptr; - return *this; - } - - col_type col() const { - return m_col[ptr]; - } - - val_type value() const { - return m_val[ptr]; - } - - private: - int ptr; - std::vector m_col; - std::vector m_val; - }; - - row_iterator row_begin(size_t i) const { - return row_iterator(build_row, i); - } - -}; - -/// Convenience function returning an instance of matrix_builder -template -matrix_builder make_matrix(const RowBuilder &row_builder) { - return matrix_builder(row_builder); -} - -} // namespace adapter - -namespace backend { -namespace detail { - -template -struct use_builtin_matrix_ops< amgcl::adapter::matrix_builder > - : std::true_type -{}; - -} // namespace detail -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/crs_tuple.hpp b/src/solvers/amgcl/adapter/crs_tuple.hpp deleted file mode 100644 index db623ca..0000000 --- a/src/solvers/amgcl/adapter/crs_tuple.hpp +++ /dev/null @@ -1,218 +0,0 @@ -#ifndef AMGCL_ADAPTER_CRS_TUPLE_HPP -#define AMGCL_ADAPTER_CRS_TUPLE_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/crs_tuple.hpp -\author Denis Demidov -\brief Specify sparse matrix as a tuple of CRS arrays. -\ingroup adapters - -Example: -\code -// Adapt STL containers: -std::vector ptr; -std::vector col; -std::vector val; - -AMG amg( std::tie(n, ptr, col, val) ); - -// Adapt raw arrays: -int *ptr; -int *col; -double *val; - -AMG amg(std::make_tuple(n, - amgcl::make_iterator_range(ptr, ptr + n + 1), - amgcl::make_iterator_range(col, col + ptr[n]), - amgcl::make_iterator_range(val, val + ptr[n]) - ) ); -\endcode -*/ - -/** - * \defgroup adapters Matrix adapters - * \brief Adapters for variuos sparse matrix formats. - */ - -#include -#include -#include -#include - -#include -#include -#include - -namespace amgcl { -namespace backend { - -//--------------------------------------------------------------------------- -// Specialization of matrix interface -//--------------------------------------------------------------------------- -template < typename N, typename PRng, typename CRng, typename VRng > -struct value_type< std::tuple > -{ - typedef typename std::decay()[0])>::type type; -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct rows_impl< std::tuple > -{ - static size_t get(const std::tuple &A) { - return std::get<0>(A); - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct cols_impl< std::tuple > -{ - static size_t get(const std::tuple &A) { - return std::get<0>(A); - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct nonzeros_impl< std::tuple > -{ - static size_t get(const std::tuple &A) { - return std::get<1>(A)[std::get<0>(A)]; - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct row_iterator< std::tuple > -{ - class type { - public: - typedef typename std::decay()[0])>::type col_type; - typedef typename std::decay()[0])>::type val_type; - - type(const std::tuple &A, size_t row) - : m_col(std::begin(std::get<2>(A))) - , m_end(std::begin(std::get<2>(A))) - , m_val(std::begin(std::get<3>(A))) - { - typedef typename std::decay()[0])>::type ptr_type; - - ptr_type row_begin = std::get<1>(A)[row]; - ptr_type row_end = std::get<1>(A)[row + 1]; - - m_col += row_begin; - m_end += row_end; - m_val += row_begin; - } - - operator bool() const { - return m_col != m_end; - } - - type& operator++() { - ++m_col; - ++m_val; - return *this; - } - - col_type col() const { - return *m_col; - } - - val_type value() const { - return *m_val; - } - - private: - typedef decltype(std::begin(std::declval())) val_iterator; - typedef decltype(std::begin(std::declval())) col_iterator; - - col_iterator m_col; - col_iterator m_end; - val_iterator m_val; - }; -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct row_begin_impl< std::tuple > -{ - typedef std::tuple Matrix; - static typename row_iterator::type - get(const Matrix &matrix, size_t row) { - return typename row_iterator::type(matrix, row); - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct row_nonzeros_impl< std::tuple > { - typedef std::tuple Matrix; - - static size_t get(const Matrix &A, size_t row) { - return std::get<1>(A)[row + 1] - std::get<1>(A)[row]; - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct ptr_data_impl< std::tuple > { - typedef std::tuple Matrix; - typedef typename std::decay()[0])>::type ptr_type; - typedef const ptr_type* type; - static type get(const Matrix &A) { - return &std::get<1>(A)[0]; - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct col_data_impl< std::tuple > { - typedef std::tuple Matrix; - typedef typename std::decay()[0])>::type col_type; - typedef const col_type* type; - static type get(const Matrix &A) { - return &std::get<2>(A)[0]; - } -}; - -template < typename N, typename PRng, typename CRng, typename VRng > -struct val_data_impl< std::tuple > { - typedef std::tuple Matrix; - typedef typename std::decay()[0])>::type val_type; - typedef const val_type* type; - static type get(const Matrix &A) { - return &std::get<3>(A)[0]; - } -}; - -namespace detail { - -template < typename N, typename PRng, typename CRng, typename VRng > -struct use_builtin_matrix_ops< std::tuple > - : std::true_type -{}; - -} // namespace detail - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/eigen.hpp b/src/solvers/amgcl/adapter/eigen.hpp deleted file mode 100644 index 37a2947..0000000 --- a/src/solvers/amgcl/adapter/eigen.hpp +++ /dev/null @@ -1,125 +0,0 @@ -#ifndef AMGCL_ADAPTER_EIGEN_HPP -#define AMGCL_ADAPTER_EIGEN_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/eigen.hpp -\author Denis Demidov -\brief Adapters for Eigen types to be used with builtin backend. -\ingroup adapters -*/ - -#include -#include -#include -#include - -namespace amgcl { -namespace backend { - -//--------------------------------------------------------------------------- -// Backend interface specialization for Eigen types -//--------------------------------------------------------------------------- -template -struct is_eigen_sparse_matrix : std::false_type {}; - -template -struct is_eigen_type : std::false_type {}; - -template -struct is_eigen_sparse_matrix< - Eigen::Map> - > : std::true_type -{}; - -template -struct is_eigen_sparse_matrix< - Eigen::SparseMatrix - > : std::true_type -{}; - -template -struct is_eigen_type< - T, - typename std::enable_if< - std::is_arithmetic::value && - std::is_base_of, T>::value - >::type - > : std::true_type -{}; - -template -struct value_type< - T, - typename std::enable_if::value>::type - > -{ - typedef typename T::Scalar type; -}; - -template -struct nonzeros_impl< - T, - typename std::enable_if::value>::type - > -{ - static size_t get(const T &matrix) { - return matrix.nonZeros(); - } -}; - -template -struct row_iterator < - T, - typename std::enable_if::value>::type - > -{ - typedef typename T::InnerIterator type; -}; - -template -struct row_begin_impl < - T, - typename std::enable_if::value>::type - > -{ - typedef typename row_iterator::type iterator; - static iterator get(const T &matrix, size_t row) { - return iterator(matrix, row); - } -}; - -} // namespace backend -} // namespace amgcl - -#define AMGCL_USE_EIGEN_VECTORS_WITH_BUILTIN_BACKEND() \ - namespace amgcl { namespace backend { \ - template \ - struct is_builtin_vector< Eigen::Matrix > \ - : std::true_type {}; \ - } } - -#endif diff --git a/src/solvers/amgcl/adapter/epetra.hpp b/src/solvers/amgcl/adapter/epetra.hpp deleted file mode 100644 index 50260de..0000000 --- a/src/solvers/amgcl/adapter/epetra.hpp +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef AMGCL_ADAPTER_EPETRA_HPP -#define AMGCL_ADAPTER_EPETRA_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov -Copyright (c) 2014, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/epetra.hpp -\author Denis Demidov -\brief Adapt Epetra_CrsMatrix from Trilinos. -\ingroup adapters -*/ - -#include - -#include -#include -#include -#include - -#include -#include - -namespace amgcl { -namespace adapter { - -/// Adapts Epetra_CrsMatrix -class epetra_map { - public: - typedef double value_type; - - epetra_map(const Epetra_CrsMatrix &A) - : A(A), order(A.ColMap()) - { - const Epetra_Map& row_map = A.RowMap(); - const Epetra_Map& col_map = A.ColMap(); - - int entries_before; - int local_entries = row_map.NumMyElements(); - A.Comm().ScanSum(&local_entries, &entries_before, 1); - entries_before -= local_entries; - - Epetra_IntVector perm(row_map); - for(int i = 0, j = entries_before; i < local_entries; ++i, ++j) - perm[i] = j; - - Epetra_Import importer = Epetra_Import(col_map, row_map); - - order.Import(perm, importer, Insert); - } - - size_t rows() const { - return A.NumMyRows(); - } - - size_t cols() const { - return A.NumGlobalCols(); - } - - size_t nonzeros() const { - return A.NumMyNonzeros(); - } - - class row_iterator { - public: - typedef int col_type; - typedef double val_type; - - row_iterator( - const Epetra_CrsMatrix &A, - const Epetra_IntVector &order, - int row - ) - { - int nnz; - A.ExtractMyRowView(row, nnz, m_val, m_col); - m_end = m_col + nnz; - - col_copy.assign(m_col, m_col + nnz); - val_copy.assign(m_val, m_val + nnz); - - for(auto &c : col_copy) c = order[c]; - - m_col = &col_copy[0]; - m_end = m_col + nnz; - m_val = &val_copy[0]; - - amgcl::detail::sort_row(m_col, m_val, nnz); - } - - operator bool() const { - return m_col != m_end; - } - - row_iterator& operator++() { - ++m_col; - ++m_val; - return *this; - } - - col_type col() const { - return *m_col; - } - - val_type value() const { - return *m_val; - } - - private: - col_type * m_col; - col_type * m_end; - val_type * m_val; - - std::vector col_copy; - std::vector val_copy; - }; - - row_iterator row_begin(int row) const { - return row_iterator(A, order, row); - } - private: - const Epetra_CrsMatrix &A; - Epetra_IntVector order; -}; - -/// Adapts Epetra_CrsMatrix -inline epetra_map map(const Epetra_CrsMatrix &A) { - return epetra_map(A); -} - -} // namespace adapter -} // namespace amgcl - - -#endif diff --git a/src/solvers/amgcl/adapter/reorder.hpp b/src/solvers/amgcl/adapter/reorder.hpp deleted file mode 100644 index 8649d3a..0000000 --- a/src/solvers/amgcl/adapter/reorder.hpp +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef AMGCL_ADAPTER_REORDER_HPP -#define AMGCL_ADAPTER_REORDER_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/reorder.hpp -\author Denis Demidov -\brief On-the-fly reodering of matrix and vectors. -\ingroup adapters -*/ - -#include -#include -#include - - -#include -#include -#include - -namespace amgcl { -namespace adapter { - -template -struct reordered_matrix { - typedef typename backend::value_type::type value_type; - typedef typename backend::row_iterator::type base_iterator; - - const Matrix &A; - const ptrdiff_t * perm; - const ptrdiff_t * iperm; - - reordered_matrix(const Matrix &A, const ptrdiff_t *perm, const ptrdiff_t * iperm) - : A(A), perm(perm), iperm(iperm) - {} - - size_t rows() const { - return backend::rows(A); - } - - size_t cols() const { - return backend::cols(A); - } - - size_t nonzeros() const { - return backend::nonzeros(A); - } - - struct row_iterator { - base_iterator base; - const ptrdiff_t * iperm; - - row_iterator(const base_iterator &base, const ptrdiff_t *iperm) - : base(base), iperm(iperm) - {} - - operator bool() const { - return base; - } - - row_iterator& operator++() { - ++base; - return *this; - } - - ptrdiff_t col() const { - return iperm[base.col()]; - } - - value_type value() const { - return base.value(); - } - }; - - row_iterator row_begin(size_t i) const { - return row_iterator(backend::row_begin(A, perm[i]), iperm); - } -}; - -template -struct reordered_vector { - typedef typename backend::value_type::type>::type raw_value_type; - typedef typename std::conditional< - std::is_const::value, - const raw_value_type, - raw_value_type - >::type value_type; - - Vector &x; - const ptrdiff_t *perm; - - reordered_vector(Vector &x, const ptrdiff_t *perm) : x(x), perm(perm) {} - - size_t size() const { - return boost::size(x); - } - - value_type& operator[](size_t i) const { - return x[perm[i]]; - } - - boost::permutation_iterator< - typename std::decay::type::iterator, - const ptrdiff_t* - > - begin() { - return boost::make_permutation_iterator(boost::begin(x), perm); - } - - boost::permutation_iterator< - typename std::decay::type::const_iterator, - const ptrdiff_t* - > - begin() const { - return boost::make_permutation_iterator(boost::begin(x), perm); - } - - boost::permutation_iterator< - typename std::decay::type::iterator, - const ptrdiff_t* - > - end() { - return boost::make_permutation_iterator(boost::end(x), perm + size()); - } - - boost::permutation_iterator< - typename std::decay::type::const_iterator, - const ptrdiff_t* - > - end() const { - return boost::make_permutation_iterator(boost::end(x), perm + size()); - } -}; - -} // namespace adapter - -namespace backend { -namespace detail { - -template -struct use_builtin_matrix_ops< adapter::reordered_matrix > - : std::true_type -{}; - -} // namespace detail - - -template -struct is_builtin_vector< adapter::reordered_vector > - : is_builtin_vector::type> -{}; - -} // namespace backend - -namespace adapter { - -template > -class reorder { - public: - template - reorder(const Matrix &A) : n(backend::rows(A)), perm(n), iperm(n) - { - ordering::get(A, perm); -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) iperm[perm[i]] = i; - } - - template - typename std::enable_if< - !backend::is_builtin_vector::value, - reordered_matrix - >::type - operator()(const Matrix &A) const { - return reordered_matrix(A, perm.data(), iperm.data()); - } - - template - typename std::enable_if< - backend::is_builtin_vector::value, - reordered_vector - >::type - operator()(Vector &x) const { - return reordered_vector(x, perm.data()); - } - - template - typename std::enable_if< - backend::is_builtin_vector::value, - reordered_vector - >::type - operator()(const Vector &x) const { - return reordered_vector(x, perm.data()); - } - - template - void forward(const Vector1 &x, Vector2 &y) const { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) y[i] = x[perm[i]]; - } - - template - void inverse(const Vector1 &x, Vector2 &y) const { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) y[perm[i]] = x[i]; - } - - private: - ptrdiff_t n; - backend::numa_vector perm, iperm; -}; - -} // namespace adapter -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/scaled_problem.hpp b/src/solvers/amgcl/adapter/scaled_problem.hpp deleted file mode 100644 index 2566d1c..0000000 --- a/src/solvers/amgcl/adapter/scaled_problem.hpp +++ /dev/null @@ -1,166 +0,0 @@ -#ifndef AMGCL_ADAPTER_SCALED_PROBLEM_HPP -#define AMGCL_ADAPTER_SCALED_PROBLEM_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/scaled_problem.hpp -\author Denis Demidov -\brief Scale matrix, rhs, and solution. - -Example: -\code -auto A = std::tie(rows, ptr, col, val); -auto scale = amgcl::adapter::scale_diagonal(A, bprm); - -// Setup solver -Solver solve(scale.matrix(A), prm, bprm); - -// option 1: rhs is untouched -solve(*scale.rhs(b), x); - -// option 2: rhs is prescaled in-place -scale(b); -solve(b, x); - -// postprocess the solution: -scale(x); -\endcode -*/ - -#include -#include -#include - -namespace amgcl { -namespace adapter { - -template -struct scaled_matrix { - typedef typename backend::value_type::type value_type; - typedef typename backend::value_type::type scale_type; - - const Matrix &A; - const Scale &s; - - scaled_matrix(const Matrix &A, const Scale &s) : A(A), s(s) {} - - size_t rows() const { return backend::rows(A); } - size_t cols() const { return backend::cols(A); } - size_t nonzeros() const { return backend::nonzeros(A); } - - struct row_iterator : public backend::row_iterator::type { - typedef typename backend::row_iterator::type Base; - - scale_type si; - const Scale &s; - - row_iterator(const Matrix &A, const Scale &s, size_t i) - : Base(A, i), si(s[i]), s(s) {} - - value_type value() const { - return si * static_cast(this)->value() * s[this->col()]; - } - }; - - row_iterator row_begin(size_t i) const { - return row_iterator(A, s, i); - } -}; - -template -struct scaled_problem { - typedef typename Backend::params backend_params; - - const std::shared_ptr s; - const backend_params &bprm; - - scaled_problem(std::shared_ptr s, const backend_params &bprm = backend_params()) - : s(s), bprm(bprm) {} - - template - scaled_matrix matrix(const Matrix &A) const { - return scaled_matrix(A, *s); - } - - template - std::shared_ptr rhs(const Vector &v) const { - auto t = Backend::copy_vector(v, bprm); - (*this)(*t); - return t; - } - - template - void operator()(Vector &x) const { - typedef typename backend::value_type::type value_type; - typedef typename math::scalar_of::type scalar_type; - - const auto one = math::identity(); - const auto zero = math::zero(); - - if (backend::is_builtin_vector::value) { - backend::vmul(one, *s, x, zero, x); - } else { - backend::vmul(one, *Backend::copy_vector(*s, bprm), x, zero, x); - } - } -}; - -template -scaled_problem< - Backend, - std::vector< - typename math::scalar_of< - typename backend::value_type::type - >::type> - > -scale_diagonal( - const Matrix &A, - const typename Backend::params &bprm = typename Backend::params() - ) -{ - typedef typename backend::value_type::type value_type; - typedef typename math::scalar_of::type scalar_type; - ptrdiff_t n = backend::rows(A); - auto s = std::make_shared>(n); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - for(auto a = backend::row_begin(A, i); a; ++a) { - if (a.col() == i) { - (*s)[i] = math::inverse(sqrt(math::norm(a.value()))); - break; - } - } - } - - return scaled_problem>(s, bprm); -} - -} // namespace adapter - -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/ublas.hpp b/src/solvers/amgcl/adapter/ublas.hpp deleted file mode 100644 index d2fcd25..0000000 --- a/src/solvers/amgcl/adapter/ublas.hpp +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef AMGCL_ADAPTER_UBLAS_HPP -#define AMGCL_ADAPTER_UBLAS_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov -Copyright (c) 2014, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/ublas.hpp -\author Denis Demidov -\brief Adapters for Boost.uBlas matrices and vectors. -\ingroup adapters -*/ - -#include -#include -#include -#include - -namespace amgcl { -namespace backend { - -// Make builtin backend recognize ublas vectors as its own: -template -struct is_builtin_vector< boost::numeric::ublas::vector > - : std::true_type {}; - -/// Adapts Boost.uBlas matrix. -template -std::tuple< - size_t, - iterator_range, - iterator_range, - iterator_range - > -map(const boost::numeric::ublas::compressed_matrix &A) { - return std::make_tuple( - A.size1(), - make_iterator_range( - A.index1_data().begin(), A.index1_data().end() - ), - make_iterator_range( - A.index2_data().begin(), A.index2_data().end() - ), - make_iterator_range( - A.value_data().begin(), A.value_data().end() - ) - ); -} - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/adapter/zero_copy.hpp b/src/solvers/amgcl/adapter/zero_copy.hpp deleted file mode 100644 index b28736d..0000000 --- a/src/solvers/amgcl/adapter/zero_copy.hpp +++ /dev/null @@ -1,98 +0,0 @@ -#ifndef AMGCL_ADAPTER_ZERO_COPY_HPP -#define AMGCL_ADAPTER_ZERO_COPY_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -\file amgcl/adapter/zero_copy.hpp -\author Denis Demidov -\brief Zero-copy adapter for input matrix in CRS format. -\ingroup adapters -*/ - -#include - -#include -#include - -namespace amgcl { -namespace adapter { - -template -std::shared_ptr< backend::crs > -zero_copy(size_t nrows, size_t ncols, const Ptr *ptr, const Col *col, const Val *val) { - // Check that Ptr and Col types are binary-compatible with ptrdiff_t: - static_assert(std::is_integral::value, "Unsupported Ptr type"); - static_assert(std::is_integral::value, "Unsupported Col type"); - static_assert(sizeof(Ptr) == sizeof(ptrdiff_t), "Unsupported Ptr type"); - static_assert(sizeof(Col) == sizeof(ptrdiff_t), "Unsupported Col type"); - - auto A = std::make_shared< backend::crs >(); - A->nrows = nrows; - A->ncols = ncols; - A->nnz = nrows ? ptr[nrows] : 0; - - A->ptr = (ptrdiff_t*)ptr; - A->col = (ptrdiff_t*)col; - A->val = (Val*)val; - - A->own_data = false; - - return A; -} - -template -std::shared_ptr< backend::crs > -zero_copy(size_t n, const Ptr *ptr, const Col *col, const Val *val) { - return zero_copy(n, n, ptr, col, val); -} - -template -std::shared_ptr< backend::crs > -zero_copy_direct(size_t nrows, size_t ncols, const Ptr *ptr, const Col *col, const Val *val) { - auto A = std::make_shared< backend::crs >(); - A->nrows = nrows; - A->ncols = ncols; - A->nnz = nrows ? ptr[nrows] : 0; - - A->ptr = const_cast(ptr); - A->col = const_cast(col); - A->val = const_cast(val); - - A->own_data = false; - - return A; -} - -template -std::shared_ptr< backend::crs > -zero_copy_direct(size_t n, const Ptr *ptr, const Col *col, const Val *val) { - return zero_copy_direct(n, n, ptr, col, val); -} - -} // namespace adapter -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/amg.hpp b/src/solvers/amgcl/amg.hpp deleted file mode 100644 index e58a5f5..0000000 --- a/src/solvers/amgcl/amg.hpp +++ /dev/null @@ -1,602 +0,0 @@ -#ifndef AMGCL_AMG_HPP -#define AMGCL_AMG_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/amg.hpp - * \author Denis Demidov - * \brief An AMG preconditioner. - */ - -#include -#include -#include -#include - -#include -#include -#include - -/// Primary namespace. -namespace amgcl { - -/// Algebraic multigrid method. -/** - * AMG is one the most effective methods for solution of large sparse - * unstructured systems of equations, arising, for example, from discretization - * of PDEs on unstructured grids \cite Trottenberg2001. The method can be used - * as a black-box solver for various computational problems, since it does not - * require any information about the underlying geometry. - * - * The three template parameters allow the user to select the exact components - * of the method: - * 1. *Backend* to transfer the constructed hierarchy to, - * 2. *Coarsening* strategy for hierarchy construction, and - * 3. *Relaxation* scheme (smoother to use during the solution phase). - * - * Instance of the class builds the AMG hierarchy for the given system matrix - * and is intended to be used as a preconditioner. - */ -template < - class Backend, - template class Coarsening, - template class Relax - > -class amg { - public: - typedef Backend backend_type; - - typedef typename Backend::value_type value_type; - typedef typename Backend::col_type col_type; - typedef typename Backend::ptr_type ptr_type; - typedef typename Backend::matrix matrix; - typedef typename Backend::vector vector; - - typedef Coarsening coarsening_type; - typedef Relax relax_type; - - typedef typename backend::builtin::matrix build_matrix; - - typedef typename math::scalar_of::type scalar_type; - - /// Backend parameters. - typedef typename Backend::params backend_params; - - /// Parameters of the method. - /** - * The amgcl::amg::params struct includes parameters for each - * component of the method as well as some universal parameters. - */ - struct params { - typedef typename coarsening_type::params coarsening_params; - typedef typename relax_type::params relax_params; - - coarsening_params coarsening; ///< Coarsening parameters. - relax_params relax; ///< Relaxation parameters. - - /// Specifies when level is coarse enough to be solved directly. - /** - * If number of variables at a next level in the hierarchy becomes - * lower than this threshold, then the hierarchy construction is - * stopped and the linear system is solved directly at this level. - */ - unsigned coarse_enough; - - /// Use direct solver at the coarsest level. - /** - * When set, the coarsest level is solved with a direct solver. - * Otherwise a smoother is used as a solver. - */ - bool direct_coarse; - - /// Maximum number of levels. - /** If this number is reached while the size of the last level is - * greater that `coarse_enough`, then the coarsest level will not - * be solved exactly, but will use a smoother. - */ - unsigned max_levels; - - /// Number of pre-relaxations. - unsigned npre; - - /// Number of post-relaxations. - unsigned npost; - - /// Number of cycles (1 for V-cycle, 2 for W-cycle, etc.). - unsigned ncycle; - - /// Number of cycles to make as part of preconditioning. - unsigned pre_cycles; - - /// Keep matrices in internal format to allow for quick rebuild of the hierarchy - bool allow_rebuild; - - params() : - coarse_enough( Backend::direct_solver::coarse_enough() ), - direct_coarse(true), - max_levels( std::numeric_limits::max() ), - npre(1), npost(1), ncycle(1), pre_cycles(1), - allow_rebuild(std::is_same::value) - {} - -#ifndef AMGCL_NO_BOOST - params(const boost::property_tree::ptree &p) - : AMGCL_PARAMS_IMPORT_CHILD(p, coarsening), - AMGCL_PARAMS_IMPORT_CHILD(p, relax), - AMGCL_PARAMS_IMPORT_VALUE(p, coarse_enough), - AMGCL_PARAMS_IMPORT_VALUE(p, direct_coarse), - AMGCL_PARAMS_IMPORT_VALUE(p, max_levels), - AMGCL_PARAMS_IMPORT_VALUE(p, npre), - AMGCL_PARAMS_IMPORT_VALUE(p, npost), - AMGCL_PARAMS_IMPORT_VALUE(p, ncycle), - AMGCL_PARAMS_IMPORT_VALUE(p, pre_cycles), - AMGCL_PARAMS_IMPORT_VALUE(p, allow_rebuild) - { - check_params(p, {"coarsening", "relax", "coarse_enough", - "direct_coarse", "max_levels", "npre", "npost", - "ncycle", "pre_cycles", "allow_rebuild"}); - - precondition(max_levels > 0, "max_levels should be positive"); - } - - void get( - boost::property_tree::ptree &p, - const std::string &path = "" - ) const - { - AMGCL_PARAMS_EXPORT_CHILD(p, path, coarsening); - AMGCL_PARAMS_EXPORT_CHILD(p, path, relax); - AMGCL_PARAMS_EXPORT_VALUE(p, path, coarse_enough); - AMGCL_PARAMS_EXPORT_VALUE(p, path, direct_coarse); - AMGCL_PARAMS_EXPORT_VALUE(p, path, max_levels); - AMGCL_PARAMS_EXPORT_VALUE(p, path, npre); - AMGCL_PARAMS_EXPORT_VALUE(p, path, npost); - AMGCL_PARAMS_EXPORT_VALUE(p, path, ncycle); - AMGCL_PARAMS_EXPORT_VALUE(p, path, pre_cycles); - AMGCL_PARAMS_EXPORT_VALUE(p, path, allow_rebuild); - } -#endif - } prm; - - /// Builds the AMG hierarchy for the system matrix. - /** - * The input matrix is copied here and is safe to delete afterwards. - * - * \param A The system matrix. Should be convertible to - * amgcl::backend::crs<>. - * \param p AMG parameters. - * - * \sa amgcl/adapter/crs_tuple.hpp - */ - template - amg( - const Matrix &M, - const params &p = params(), - const backend_params &bprm = backend_params() - ) : prm(p) - { - auto A = std::make_shared(M); - sort_rows(*A); - - do_init(A, bprm); - } - - /// Builds the AMG hierarchy for the system matrix. - /** - * The shared pointer to the input matrix is passed here. The matrix - * will not be copied and should out-live the amg instance. - * The matrix should be either in amgcl::backend::crs format, or - * inherit from the class and override its ptr(), col(), and val() - * virtual functions. - * - * \param A The system matrix. - * \param p AMG parameters. - * - * \sa amgcl/adapter/crs_tuple.hpp - */ - amg( - std::shared_ptr A, - const params &p = params(), - const backend_params &bprm = backend_params() - ) : prm(p) - { - do_init(A, bprm); - } - - /// Rebuild the hierarchy using the new system matrix. - /** - * This requires for prm.allow_rebuild to be set. The transfer - * operators created during the initial setup are reused. - */ - template - void rebuild( - const Matrix &M, - const backend_params &bprm = backend_params() - ) - { - auto A = std::make_shared(M); - sort_rows(*A); - rebuild(A, bprm); - } - - /// Rebuild the hierarchy using the new system matrix. - /** - * This requires for prm.allow_rebuild to be set. The transfer - * operators created during the initial setup are reused. - */ - void rebuild( - std::shared_ptr A, - const backend_params &bprm = backend_params() - ) - { - precondition(prm.allow_rebuild, - "allow_rebuild is not set!"); - precondition( - backend::rows(*A) == backend::rows(system_matrix()) && - backend::cols(*A) == backend::rows(*A), - "Matrix dimensions differ from the original ones!" - ); - - AMGCL_TIC("rebuild"); - coarsening_type C(prm.coarsening); - for(auto &level : levels) { - A = level.rebuild(A, C, prm, bprm); - } - AMGCL_TOC("rebuild"); - } - - /// Performs single V-cycle for the given right-hand side and solution. - /** - * \param rhs Right-hand side vector. - * \param x Solution vector. - */ - template - void cycle(const Vec1 &rhs, Vec2 &&x) const { - cycle(levels.begin(), rhs, x); - } - - /// Performs single V-cycle after clearing x. - /** - * This is intended for use as a preconditioning procedure. - * - * \param rhs Right-hand side vector. - * \param x Solution vector. - */ - template - void apply(const Vec1 &rhs, Vec2 &&x) const { - if (prm.pre_cycles) { - backend::clear(x); - for(unsigned i = 0; i < prm.pre_cycles; ++i) - cycle(rhs, x); - } else { - backend::copy(rhs, x); - } - } - - /// Returns the system matrix from the finest level. - std::shared_ptr system_matrix_ptr() const { - return levels.front().A; - } - - const matrix& system_matrix() const { - return *system_matrix_ptr(); - } - - size_t bytes() const { - size_t b = 0; - for(const auto &lvl : levels) b += lvl.bytes(); - return b; - } - private: - struct level { - size_t m_rows, m_nonzeros; - - std::shared_ptr f; - std::shared_ptr u; - std::shared_ptr t; - - std::shared_ptr A; - std::shared_ptr P; - std::shared_ptr R; - - std::shared_ptr bP; - std::shared_ptr bR; - - std::shared_ptr< typename Backend::direct_solver > solve; - - std::shared_ptr relax; - - size_t bytes() const { - size_t b = 0; - - if (f) b += backend::bytes(*f); - if (u) b += backend::bytes(*u); - if (t) b += backend::bytes(*t); - - if (A) b += backend::bytes(*A); - if (P) b += backend::bytes(*P); - if (R) b += backend::bytes(*R); - - if (solve) b += backend::bytes(*solve); - if (relax) b += backend::bytes(*relax); - - return b; - } - - level() : m_rows(0), m_nonzeros(0) {} - - level(std::shared_ptr A, - params &prm, const backend_params &bprm) - : m_rows(backend::rows(*A)), m_nonzeros(backend::nonzeros(*A)) - { - AMGCL_TIC("move to backend"); - f = Backend::create_vector(m_rows, bprm); - u = Backend::create_vector(m_rows, bprm); - t = Backend::create_vector(m_rows, bprm); - this->A = Backend::copy_matrix(A, bprm); - AMGCL_TOC("move to backend"); - - AMGCL_TIC("relaxation"); - relax = std::make_shared(*A, prm.relax, bprm); - AMGCL_TOC("relaxation"); - } - - std::shared_ptr step_down( - std::shared_ptr A, - coarsening_type &C, const backend_params &bprm, - bool allow_rebuild) - { - AMGCL_TIC("transfer operators"); - std::shared_ptr P, R; - - try { - std::tie(P, R) = C.transfer_operators(*A); - } catch(error::empty_level) { - AMGCL_TOC("transfer operators"); - return std::shared_ptr(); - } - - sort_rows(*P); - sort_rows(*R); - - if (allow_rebuild) { - bP = P; - bR = R; - } - AMGCL_TOC("transfer operators"); - - AMGCL_TIC("move to backend"); - this->P = Backend::copy_matrix(P, bprm); - this->R = Backend::copy_matrix(R, bprm); - AMGCL_TOC("move to backend"); - - AMGCL_TIC("coarse operator"); - A = C.coarse_operator(*A, *P, *R); - sort_rows(*A); - AMGCL_TOC("coarse operator"); - - return A; - } - - void create_coarse( - std::shared_ptr A, - const backend_params &bprm, bool single_level) - { - m_rows = backend::rows(*A); - m_nonzeros = backend::nonzeros(*A); - - u = Backend::create_vector(m_rows, bprm); - f = Backend::create_vector(m_rows, bprm); - - solve = Backend::create_solver(A, bprm); - if (single_level) - this->A = Backend::copy_matrix(A, bprm); - } - - std::shared_ptr rebuild( - std::shared_ptr A, - const coarsening_type &C, - const params &prm, - const backend_params &bprm - ) - { - if (this->A) { - AMGCL_TIC("move to backend"); - this->A = Backend::copy_matrix(A, bprm); - AMGCL_TOC("move to backend"); - } - - if(relax) { - AMGCL_TIC("relaxation"); - relax = std::make_shared(*A, prm.relax, bprm); - AMGCL_TOC("relaxation"); - } - - if (solve) { - AMGCL_TIC("coarsest level"); - solve = Backend::create_solver(A, bprm); - AMGCL_TOC("coarsest level"); - } - - if (bP && bR) { - AMGCL_TIC("coarse operator"); - A = C.coarse_operator(*A, *bP, *bR); - sort_rows(*A); - AMGCL_TOC("coarse operator"); - } - - return A; - } - - size_t rows() const { - return m_rows; - } - - size_t nonzeros() const { - return m_nonzeros; - } - }; - - typedef typename std::list::const_iterator level_iterator; - - std::list levels; - - void do_init( - std::shared_ptr A, - const backend_params &bprm = backend_params() - ) - { - precondition( - backend::rows(*A) == backend::cols(*A), - "Matrix should be square!" - ); - - bool direct_coarse_solve = true; - - coarsening_type C(prm.coarsening); - - while( backend::rows(*A) > prm.coarse_enough) { - levels.push_back( level(A, prm, bprm) ); - - if (levels.size() >= prm.max_levels) break; - - A = levels.back().step_down(A, C, bprm, prm.allow_rebuild); - if (!A) { - // Zero-sized coarse level. Probably the system matrix on - // this level is diagonal, should be easily solvable with a - // couple of smoother iterations. - direct_coarse_solve = false; - break; - } - } - - if (!A || backend::rows(*A) > prm.coarse_enough) { - // The coarse matrix is still too big to be solved directly. - direct_coarse_solve = false; - } - - if (direct_coarse_solve) { - AMGCL_TIC("coarsest level"); - if (prm.direct_coarse) { - level l; - l.create_coarse(A, bprm, levels.empty()); - levels.push_back(l); - } else { - levels.push_back( level(A, prm, bprm) ); - } - AMGCL_TOC("coarsest level"); - } - } - - template - void cycle(level_iterator lvl, const Vec1 &rhs, Vec2 &x) const - { - level_iterator nxt = lvl, end = levels.end(); - ++nxt; - - if (nxt == end) { - if (lvl->solve) { - AMGCL_TIC("coarse"); - (*lvl->solve)(rhs, x); - AMGCL_TOC("coarse"); - } else { - AMGCL_TIC("relax"); - for(size_t i = 0; i < prm.npre; ++i) lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t); - for(size_t i = 0; i < prm.npost; ++i) lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t); - AMGCL_TOC("relax"); - } - } else { - for (size_t j = 0; j < prm.ncycle; ++j) { - AMGCL_TIC("relax"); - for(size_t i = 0; i < prm.npre; ++i) - lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t); - AMGCL_TOC("relax"); - - backend::residual(rhs, *lvl->A, x, *lvl->t); - - backend::spmv(math::identity(), *lvl->R, *lvl->t, math::zero(), *nxt->f); - - backend::clear(*nxt->u); - cycle(nxt, *nxt->f, *nxt->u); - - backend::spmv(math::identity(), *lvl->P, *nxt->u, math::identity(), x); - - AMGCL_TIC("relax"); - for(size_t i = 0; i < prm.npost; ++i) - lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t); - AMGCL_TOC("relax"); - } - } - } - - template class C, template class R> - friend std::ostream& operator<<(std::ostream &os, const amg &a); -}; - -/// Sends information about the AMG hierarchy to output stream. -template class C, template class R> -std::ostream& operator<<(std::ostream &os, const amg &a) -{ - typedef typename amg::level level; - ios_saver ss(os); - - size_t sum_dof = 0; - size_t sum_nnz = 0; - size_t sum_mem = 0; - - for(const level &lvl : a.levels) { - sum_dof += lvl.rows(); - sum_nnz += lvl.nonzeros(); - sum_mem += lvl.bytes(); - } - - os << "Number of levels: " << a.levels.size() - << "\nOperator complexity: " << std::fixed << std::setprecision(2) - << 1.0 * sum_nnz / a.levels.front().nonzeros() - << "\nGrid complexity: " << std::fixed << std::setprecision(2) - << 1.0 * sum_dof / a.levels.front().rows() - << "\nMemory footprint: " << human_readable_memory(sum_mem) - << "\n\n" - "level unknowns nonzeros memory\n" - "---------------------------------------------\n"; - - size_t depth = 0; - for(const level &lvl : a.levels) { - os << std::setw(5) << depth++ - << std::setw(13) << lvl.rows() - << std::setw(15) << lvl.nonzeros() - << std::setw(12) << human_readable_memory(lvl.bytes()) - << " (" << std::setw(5) << std::fixed << std::setprecision(2) - << 100.0 * lvl.nonzeros() / sum_nnz - << "%)" << std::endl; - } - - return os; -} - -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/blaze.hpp b/src/solvers/amgcl/backend/blaze.hpp deleted file mode 100644 index 6766d6a..0000000 --- a/src/solvers/amgcl/backend/blaze.hpp +++ /dev/null @@ -1,332 +0,0 @@ -#ifndef AMGCL_BACKEND_BLAZE_HPP -#define AMGCL_BACKEND_BLAZE_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/viennacl.hpp - * \author Denis Demidov - * \brief Blaze backend. - * - * Uses Blaze (https://code.google.com/p/blaze-lib) types and operations. - */ - -#include - -#include -#include - -namespace amgcl { -namespace backend { - -/// Blaze backend -/** - * This is a backend that uses types defined in the Blaze library - * (https://bitbucket.org/blaze-lib/blaze/src). - * - * \param real Value type. - * \ingroup backends - */ -template -struct blaze { - typedef real value_type; - typedef ptrdiff_t index_type; - typedef ptrdiff_t col_type; - typedef ptrdiff_t ptr_type; - - struct provides_row_iterator : std::true_type {}; - - typedef ::blaze::CompressedMatrix matrix; - typedef ::blaze::DynamicVector vector; - typedef ::blaze::DynamicVector matrix_diagonal; - typedef solver::skyline_lu direct_solver; - - /// Backend parameters. - typedef amgcl::detail::empty_params params; - - static std::string name() { return "blaze"; } - - /// Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix( - std::shared_ptr< typename builtin::matrix > A, - const params& - ) - { - typedef - typename row_iterator::matrix>::type - row_iterator; - - const size_t n = rows(*A); - const size_t m = cols(*A); - - auto B = std::make_shared(n, m); - - B->reserve(nonzeros(*A)); - for(size_t i = 0; i < n; ++i) { - for(row_iterator a = A->row_begin(i); a; ++a) { - B->append(i, a.col(), a.value()); - } - B->finalize(i); - } - - return B; - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(typename builtin::vector const &x, const params&) - { - auto v = std::make_shared(x.size(), &x[0]); - return v; - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector( - std::shared_ptr< typename builtin::vector > x, - const params &prm - ) - { - return copy_vector(*x, prm); - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - /// Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr< typename builtin::matrix > A, const params&) - { - return std::make_shared(*A); - } - -}; - -//--------------------------------------------------------------------------- -// Backend interface implementation -//--------------------------------------------------------------------------- -template < typename V, bool O > -struct value_type < ::blaze::CompressedMatrix > { - typedef V type; -}; - -template < typename V > -struct value_type < ::blaze::DynamicVector > { - typedef V type; -}; - -template < typename V, bool O > -struct cols_impl< ::blaze::CompressedMatrix > { - typedef ::blaze::CompressedMatrix matrix; - - static size_t get(const matrix &A) { - return A.columns(); - } -}; - -template < typename V, bool O > -struct nonzeros_impl< ::blaze::CompressedMatrix > { - typedef ::blaze::CompressedMatrix matrix; - - static size_t get(const matrix &A) { - return A.nonZeros(); - } -}; - -template < typename V, bool O > -struct row_iterator< ::blaze::CompressedMatrix > -{ - struct type { - typedef typename ::blaze::CompressedMatrix::ConstIterator Base; - Base base; - Base end; - - operator bool() const { - return base != end; - } - - type operator++() { - ++base; - return *this; - } - - size_t col() const { - return base->index(); - } - - V value() const { - return base->value(); - } - }; -}; - -template < typename V, bool O > -struct row_begin_impl< ::blaze::CompressedMatrix > { - typedef typename row_iterator< ::blaze::CompressedMatrix >::type iterator; - static iterator get(const ::blaze::CompressedMatrix &A, size_t row) { - return iterator{A.cbegin(row), A.cend(row)}; - } -}; - -template < class A, class B, typename V, bool O > -struct spmv_impl< - A, ::blaze::CompressedMatrix, ::blaze::DynamicVector, - B, ::blaze::DynamicVector - > -{ - typedef ::blaze::CompressedMatrix matrix; - typedef ::blaze::DynamicVector vector; - - static void apply(A alpha, const matrix &K, const vector &x, B beta, vector &y) - { - if (!math::is_zero(beta)) - y = alpha * (K * x) + beta * y; - else - y = alpha * (K * x); - } -}; - -template < typename V, bool O > -struct residual_impl< - ::blaze::CompressedMatrix, - ::blaze::DynamicVector, - ::blaze::DynamicVector, - ::blaze::DynamicVector - > -{ - typedef ::blaze::CompressedMatrix matrix; - typedef ::blaze::DynamicVector vector; - - static void apply(const vector &rhs, const matrix &A, const vector &x, - vector &r) - { - r = rhs - A * x; - } -}; - -template < typename V > -struct clear_impl< ::blaze::DynamicVector > -{ - typedef ::blaze::DynamicVector vector; - - static void apply(vector &x) - { - x = 0; - } -}; - -template < typename V > -struct copy_impl< - ::blaze::DynamicVector, - ::blaze::DynamicVector - > -{ - typedef ::blaze::DynamicVector vector; - - static void apply(const vector &x, vector &y) - { - y = x; - } -}; - -template < typename V > -struct inner_product_impl< - ::blaze::DynamicVector, - ::blaze::DynamicVector - > -{ - typedef ::blaze::DynamicVector vector; - - static V get(const vector &x, const vector &y) - { - return (x, y); - } -}; - -template < typename A, typename B, typename V > -struct axpby_impl< - A, ::blaze::DynamicVector, - B, ::blaze::DynamicVector - > -{ - typedef ::blaze::DynamicVector vector; - - static void apply(A a, const vector &x, B b, vector &y) - { - if (!math::is_zero(b)) - y = a * x + b * y; - else - y = a * x; - } -}; - -template < typename A, typename B, typename C, typename V > -struct axpbypcz_impl< - A, ::blaze::DynamicVector, - B, ::blaze::DynamicVector, - C, ::blaze::DynamicVector - > -{ - typedef ::blaze::DynamicVector vector; - - static void apply( - V a, const vector &x, - V b, const vector &y, - V c, vector &z - ) - { - if (!math::is_zero(c)) - z = a * x + b * y + c * z; - else - z = a * x + b * y; - } -}; - -template < typename A, typename B, typename V > -struct vmul_impl< - A, ::blaze::DynamicVector, ::blaze::DynamicVector, - B, ::blaze::DynamicVector - > -{ - typedef ::blaze::DynamicVector vector; - - static void apply(A a, const vector &x, const vector &y, B b, vector &z) - { - if (!math::is_zero(b)) - z = a * x * y + b * z; - else - z = a * x * y; - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/block_crs.hpp b/src/solvers/amgcl/backend/block_crs.hpp deleted file mode 100644 index 657f559..0000000 --- a/src/solvers/amgcl/backend/block_crs.hpp +++ /dev/null @@ -1,313 +0,0 @@ -#ifndef AMGCL_BACKEND_BLOCK_CRS_HPP -#define AMGCL_BACKEND_BLOCK_CRS_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/block_crs.hpp - * \author Denis Demidov - * \brief Sparse matrix in block-CRS format. - */ - -#include -#include - -#include -#include -#include -#include - -namespace amgcl { -namespace backend { - -/// Sparse matrix in Block CRS format. -/** - * \param V Value type. - * \param C Column number type. - * \param P Index type. - */ -template < typename V, typename C, typename P > -struct bcrs { - typedef V value_type; - typedef V val_type; - typedef C col_type; - typedef P ptr_type; - - size_t block_size; - size_t nrows, ncols; - size_t brows, bcols; - - std::vector ptr; - std::vector col; - std::vector val; - - /// Converts matrix in CRS format to Block CRS format. - /** - * \param A Input matrix. - * \param block_size Block size. - * - * \note Input matrix dimensions are *not* required to be divisible by - * block_size. - */ - template < class Matrix > - bcrs(const Matrix &A, size_t block_size) - : block_size(block_size), nrows( rows(A) ), ncols( cols(A) ), - brows((nrows + block_size - 1) / block_size), - bcols((ncols + block_size - 1) / block_size), - ptr(brows + 1, 0) - { -#pragma omp parallel - { - std::vector marker(bcols, -1); - - // Count number of nonzeros in block matrix. -#pragma omp for - for(ptr_type ib = 0; ib < static_cast(brows); ++ib) { - ptr_type ia = ib * block_size; - - for(size_t k = 0; k < block_size && ia < static_cast(nrows); ++k, ++ia) { - for(auto a = backend::row_begin(A, ia); a; ++a) { - col_type cb = a.col() / block_size; - - if (marker[cb] != static_cast(ib)) { - marker[cb] = static_cast(ib); - ++ptr[ib + 1]; - } - } - } - } - -#pragma omp single - { - std::partial_sum(ptr.begin(), ptr.end(), ptr.begin()); - col.resize(ptr.back()); - val.resize(ptr.back() * block_size * block_size, 0); - } - - std::fill(marker.begin(), marker.end(), -1); - - // Fill the block matrix. -#pragma omp for - for(ptr_type ib = 0; ib < static_cast(brows); ++ib) { - ptr_type ia = ib * block_size; - ptr_type row_beg = ptr[ib]; - ptr_type row_end = row_beg; - - for(size_t k = 0; k < block_size && ia < static_cast(nrows); ++k, ++ia) { - for(auto a = backend::row_begin(A, ia); a; ++a) { - col_type cb = a.col() / block_size; - col_type cc = a.col() % block_size; - val_type va = a.value(); - - if (marker[cb] < row_beg) { - marker[cb] = row_end; - col[row_end] = cb; - val[block_size * (block_size * row_end + k) + cc] = va; - ++row_end; - } else { - val[block_size * (block_size * marker[cb] + k) + cc] = va; - } - } - } - } - } - } -}; - -/// block_crs backend definition. -/** - * \param real Value type. - * \ingroup backends - */ -template -struct block_crs { - typedef real value_type; - typedef ptrdiff_t index_type; - typedef ptrdiff_t col_type; - typedef ptrdiff_t ptr_type; - - typedef bcrs matrix; - typedef typename builtin::vector vector; - typedef typename builtin::vector matrix_diagonal; - typedef solver::skyline_lu direct_solver; - - struct provides_row_iterator : std::false_type {}; - - /// Backend parameters. - struct params { - /// Block size to use with the created matrices. - size_t block_size; - - params(size_t block_size = 4) : block_size(block_size) {} - -#ifndef AMGCL_NO_BOOST - params(const boost::property_tree::ptree &p) - : AMGCL_PARAMS_IMPORT_VALUE(p, block_size) - { - check_params(p, {"block_size"}); - } - void get(boost::property_tree::ptree &p, const std::string &path) const { - AMGCL_PARAMS_EXPORT_VALUE(p, path, block_size); - } -#endif - }; - - static std::string name() { return "block_crs"; } - - /// Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix(std::shared_ptr< typename backend::builtin::matrix > A, - const params &prm) - { - return std::make_shared(*A, prm.block_size); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(const vector &x, const params&) - { - return std::make_shared(x); - } - - static std::shared_ptr< vector > - copy_vector(const std::vector &x, const params&) - { - return std::make_shared(x); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(std::shared_ptr< vector > x, const params&) - { - return x; - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - static std::shared_ptr - create_solver( - std::shared_ptr< typename backend::builtin::matrix > A, - const params&) - { - return std::make_shared(*A); - } -}; - -//--------------------------------------------------------------------------- -// Specialization of backend interface -//--------------------------------------------------------------------------- -template < typename V, typename C, typename P > -struct rows_impl< bcrs > { - static size_t get(const bcrs &A) { - return A.nrows; - } -}; - -template < typename V, typename C, typename P > -struct cols_impl< bcrs > { - static size_t get(const bcrs &A) { - return A.ncols; - } -}; - -template < typename V, typename C, typename P > -struct nonzeros_impl< bcrs > { - static size_t get(const bcrs &A) { - return A.ptr.back() * A.block_size * A.block_size; - } -}; - -template < typename Alpha, typename Beta, typename V, typename C, typename P, class Vec1, class Vec2 > -struct spmv_impl< Alpha, bcrs, Vec1, Beta, Vec2 > -{ - typedef bcrs matrix; - - static void apply(Alpha alpha, const matrix &A, const Vec1 &x, Beta beta, Vec2 &y) - { - const size_t nb = A.brows; - const size_t na = A.nrows; - const size_t ma = A.ncols; - const size_t b1 = A.block_size; - const size_t b2 = b1 * b1; - - if (!math::is_zero(beta)) { - if (beta != 1) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(na); ++i) { - y[i] *= beta; - } - } - } else { - backend::clear(y); - } - -#pragma omp parallel for - for(ptrdiff_t ib = 0; ib < static_cast(nb); ++ib) { - for(P jb = A.ptr[ib], eb = A.ptr[ib + 1]; jb < eb; ++jb) { - size_t x0 = A.col[jb] * b1; - size_t y0 = ib * b1; - block_prod(b1, std::min(b1, ma - x0), std::min(b1, na - y0), - alpha, &A.val[jb * b2], &x[x0], &y[y0] - ); - } - } - } - - static void block_prod(size_t dim, size_t nx, size_t ny, - Alpha alpha, const V *A, const V *x, V *y) - { - for(size_t i = 0; i < ny; ++i, ++y) { - const V * xx = x; - V sum = 0; - for(size_t j = 0; j < dim; ++j, ++A, ++xx) - if (j < nx) sum += (*A) * (*xx); - *y += alpha * sum; - } - } -}; - -template < typename V, typename C, typename P, class Vec1, class Vec2, class Vec3 > -struct residual_impl< bcrs, Vec1, Vec2, Vec3 > -{ - typedef bcrs matrix; - - static void apply(const Vec1 &rhs, const matrix &A, const Vec2 &x, Vec3 &r) - { - typedef typename math::scalar_of::type S; - const auto one = math::identity(); - backend::copy(rhs, r); - backend::spmv(-one, A, x, one, r); - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/builtin.hpp b/src/solvers/amgcl/backend/builtin.hpp deleted file mode 100644 index 90a254e..0000000 --- a/src/solvers/amgcl/backend/builtin.hpp +++ /dev/null @@ -1,1372 +0,0 @@ -#ifndef AMGCL_BACKEND_BUILTIN_HPP -#define AMGCL_BACKEND_BUILTIN_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/builtin.hpp - * \author Denis Demidov - * \brief Builtin backend. - */ - -#include -#include -#include -#include -#include - -#ifdef _OPENMP -# include -#endif - -#include -#include -#include -#include -#include -#include -#include - -namespace amgcl { -namespace backend { - -/// Sparse matrix stored in CRS format. -template < - typename val_t = double, - typename col_t = ptrdiff_t, - typename ptr_t = col_t - > -struct crs { - typedef val_t value_type; - typedef val_t val_type; - typedef col_t col_type; - typedef ptr_t ptr_type; - - size_t nrows, ncols, nnz; - ptr_type * ptr; - col_type * col; - val_type * val; - bool own_data; - - crs() : nrows(0), ncols(0), nnz(0), ptr(0), col(0), val(0), own_data(true) - {} - - template < - class PtrRange, - class ColRange, - class ValRange - > - crs(size_t nrows, size_t ncols, - const PtrRange &ptr_range, - const ColRange &col_range, - const ValRange &val_range - ) : - nrows(nrows), ncols(ncols), nnz(0), - ptr(0), col(0), val(0), own_data(true) - { - AMGCL_TIC("CSR copy"); - precondition(static_cast(nrows + 1) == std::distance( - std::begin(ptr_range), std::end(ptr_range)), - "ptr_range has wrong size in crs constructor"); - - nnz = ptr_range[nrows]; - - precondition(static_cast(nnz) == std::distance( - std::begin(col_range), std::end(col_range)), - "col_range has wrong size in crs constructor"); - - precondition(static_cast(nnz) == std::distance( - std::begin(val_range), std::end(val_range)), - "val_range has wrong size in crs constructor"); - - ptr = new ptr_type[nrows + 1]; - col = new col_type[nnz]; - val = new val_type[nnz]; - - ptr[0] = ptr_range[0]; -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { - ptr[i+1] = ptr_range[i+1]; - for(auto j = ptr_range[i]; j < ptr_range[i+1]; ++j) { - col[j] = col_range[j]; - val[j] = val_range[j]; - } - } - AMGCL_TOC("CSR copy"); - } - - template - crs(const Matrix &A) : - nrows(backend::rows(A)), ncols(backend::cols(A)), - nnz(0), ptr(0), col(0), val(0), own_data(true) - { - AMGCL_TIC("CSR copy"); - ptr = new ptr_type[nrows + 1]; - ptr[0] = 0; - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { - int row_width = 0; - for(auto a = backend::row_begin(A, i); a; ++a) ++row_width; - ptr[i+1] = row_width; - } - - nnz = scan_row_sizes(); - col = new col_type[nnz]; - val = new val_type[nnz]; - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { - ptr_type row_head = ptr[i]; - for(auto a = backend::row_begin(A, i); a; ++a) { - col[row_head] = a.col(); - val[row_head] = a.value(); - - ++row_head; - } - } - AMGCL_TOC("CSR copy"); - } - - crs(const crs &other) : - nrows(other.nrows), ncols(other.ncols), nnz(other.nnz), - ptr(0), col(0), val(0), own_data(true) - { - if (other.ptr && other.col && other.val) { - ptr = new ptr_type[nrows + 1]; - col = new col_type[nnz]; - val = new val_type[nnz]; - - ptr[0] = other.ptr[0]; -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { - ptr[i+1] = other.ptr[i+1]; - for(ptr_type j = other.ptr[i]; j < other.ptr[i+1]; ++j) { - col[j] = other.col[j]; - val[j] = other.val[j]; - } - } - } - } - - crs(crs &&other) : - nrows(other.nrows), ncols(other.ncols), nnz(other.nnz), - ptr(other.ptr), col(other.col), val(other.val), - own_data(other.own_data) - { - other.nrows = 0; - other.ncols = 0; - other.nnz = 0; - other.ptr = 0; - other.col = 0; - other.val = 0; - } - - const crs& operator=(const crs &other) { - free_data(); - - nrows = other.nrows; - ncols = other.ncols; - nnz = other.nnz; - - if (other.ptr && other.col && other.val) { - ptr = new ptr_type[nrows + 1]; - col = new col_type[nnz]; - val = new val_type[nnz]; - - ptr[0] = other.ptr[0]; -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { - ptr[i+1] = other.ptr[i+1]; - for(ptr_type j = other.ptr[i]; j < other.ptr[i+1]; ++j) { - col[j] = other.col[j]; - val[j] = other.val[j]; - } - } - } - - return *this; - } - - const crs& operator=(crs &&other) { - std::swap(nrows, other.nrows); - std::swap(ncols, other.ncols); - std::swap(nnz, other.nnz); - std::swap(ptr, other.ptr); - std::swap(col, other.col); - std::swap(val, other.val); - std::swap(own_data, other.own_data); - - return *this; - } - - void free_data() { - if (own_data) { - delete[] ptr; ptr = 0; - delete[] col; col = 0; - delete[] val; val = 0; - } - } - - void set_size(size_t n, size_t m, bool clean_ptr = false) { - precondition(!ptr, "matrix data has already been allocated!"); - - nrows = n; - ncols = m; - - ptr = new ptr_type[nrows + 1]; - - if (clean_ptr) { - ptr[0] = 0; -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) - ptr[i+1] = 0; - } - } - - ptr_type scan_row_sizes() { - std::partial_sum(ptr, ptr + nrows + 1, ptr); - return ptr[nrows]; - } - - void set_nonzeros() { - set_nonzeros(ptr[nrows]); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { - ptrdiff_t row_beg = ptr[i]; - ptrdiff_t row_end = ptr[i+1]; - for(ptrdiff_t j = row_beg; j < row_end; ++j) { - col[j] = 0; - val[j] = math::zero(); - } - } - } - - void set_nonzeros(size_t n, bool need_values = true) { - precondition(!col && !val, "matrix data has already been allocated!"); - - nnz = n; - - col = new col_type[nnz]; - - if (need_values) - val = new val_type[nnz]; - } - - ~crs() { - free_data(); - } - - class row_iterator { - public: - row_iterator( - const col_type * col, - const col_type * end, - const val_type * val - ) : m_col(col), m_end(end), m_val(val) - {} - - operator bool() const { - return m_col < m_end; - } - - row_iterator& operator++() { - ++m_col; - ++m_val; - return *this; - } - - col_type col() const { - return *m_col; - } - - val_type value() const { - return *m_val; - } - - private: - const col_type * m_col; - const col_type * m_end; - const val_type * m_val; - }; - - row_iterator row_begin(size_t row) const { - ptr_type p = ptr[row]; - ptr_type e = ptr[row + 1]; - return row_iterator(col + p, col + e, val + p); - } - - size_t bytes() const { - if (own_data) { - return sizeof(ptr_type) * (nrows + 1) - + sizeof(col_type) * nnz - + sizeof(val_type) * nnz; - } else { - return 0; - } - } -}; - -/// Sort rows of the matrix column-wise. -template < typename V, typename C, typename P > -void sort_rows(crs &A) { - const size_t n = rows(A); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - P beg = A.ptr[i]; - P end = A.ptr[i + 1]; - amgcl::detail::sort_row(A.col + beg, A.val + beg, end - beg); - } -} - -/// Transpose of a sparse matrix. -template < typename V, typename C, typename P > -std::shared_ptr< crs > transpose(const crs &A) -{ - const size_t n = rows(A); - const size_t m = cols(A); - const size_t nnz = nonzeros(A); - - auto T = std::make_shared< crs >(); - T->set_size(m, n, true); - - for(size_t j = 0; j < nnz; ++j) - ++( T->ptr[A.col[j] + 1] ); - - T->scan_row_sizes(); - T->set_nonzeros(); - - for(size_t i = 0; i < n; i++) { - for(P j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) { - P head = T->ptr[A.col[j]]++; - - T->col[head] = static_cast(i); - T->val[head] = math::adjoint(A.val[j]); - } - } - - std::rotate(T->ptr, T->ptr + m, T->ptr + m + 1); - T->ptr[0] = 0; - - return T; -} - -/// Matrix-matrix product. -template -std::shared_ptr< crs > -product(const crs &A, const crs &B, bool sort = false) { - auto C = std::make_shared< crs >(); - -#ifdef _OPENMP - int nt = omp_get_max_threads(); -#else - int nt = 1; -#endif - - if (nt > 16) { - spgemm_rmerge(A, B, *C); - } else { - spgemm_saad(A, B, *C, sort); - } - - return C; -} - -/// Sum of two matrices -template -std::shared_ptr< crs > -sum(Val alpha, const crs &A, Val beta, const crs &B, bool sort = false) { - typedef ptrdiff_t Idx; - - auto C = std::make_shared< crs >(); - precondition(A.nrows == B.nrows && A.ncols == B.ncols , "matrices should have same shape!"); - C->set_size(A.nrows, A.ncols); - - C->ptr[0] = 0; - -#pragma omp parallel - { - std::vector marker(C->ncols, -1); - -#pragma omp for - for(Idx i = 0; i < static_cast(C->nrows); ++i) { - Idx C_cols = 0; - - for(Idx j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { - Idx c = A.col[j]; - - if (marker[c] != i) { - marker[c] = i; - ++C_cols; - } - } - - for(Idx j = B.ptr[i], e = B.ptr[i+1]; j < e; ++j) { - Idx c = B.col[j]; - - if (marker[c] != i) { - marker[c] = i; - ++C_cols; - } - } - - C->ptr[i + 1] = C_cols; - } - } - - C->set_nonzeros(C->scan_row_sizes()); - -#pragma omp parallel - { - std::vector marker(C->ncols, -1); - -#pragma omp for - for(Idx i = 0; i < static_cast(C->nrows); ++i) { - Idx row_beg = C->ptr[i]; - Idx row_end = row_beg; - - for(Idx j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { - Idx c = A.col[j]; - Val v = alpha * A.val[j]; - - if (marker[c] < row_beg) { - marker[c] = row_end; - C->col[row_end] = c; - C->val[row_end] = v; - ++row_end; - } else { - C->val[marker[c]] += v; - } - } - - for(Idx j = B.ptr[i], e = B.ptr[i+1]; j < e; ++j) { - Idx c = B.col[j]; - Val v = beta * B.val[j]; - - if (marker[c] < row_beg) { - marker[c] = row_end; - C->col[row_end] = c; - C->val[row_end] = v; - ++row_end; - } else { - C->val[marker[c]] += v; - } - } - - if (sort) amgcl::detail::sort_row( - C->col + row_beg, C->val + row_beg, row_end - row_beg); - } - } - - return C; -} - -/// Scale matrix values. -template -void scale(crs &A, T s) { - ptrdiff_t n = backend::rows(A); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) - A.val[j] *= s; - } -} - -// Reduce matrix to a pointwise one -template -std::shared_ptr< - crs< typename math::scalar_of::type, col_type, ptr_type > - > -pointwise_matrix(const crs &A, unsigned block_size) { - typedef value_type V; - typedef typename math::scalar_of::type S; - - AMGCL_TIC("pointwise_matrix"); - const ptrdiff_t n = A.nrows; - const ptrdiff_t m = A.ncols; - const ptrdiff_t np = n / block_size; - const ptrdiff_t mp = m / block_size; - - precondition(np * block_size == n, - "Matrix size should be divisible by block_size"); - - auto ap = std::make_shared< crs >(); - auto &Ap = *ap; - - Ap.set_size(np, mp, true); - -#pragma omp parallel - { - std::vector j(block_size); - std::vector e(block_size); - - // Count number of nonzeros in block matrix. -#pragma omp for - for(ptrdiff_t ip = 0; ip < np; ++ip) { - ptrdiff_t ia = ip * block_size; - col_type cur_col = 0; - bool done = true; - - for(unsigned k = 0; k < block_size; ++k) { - ptr_type beg = j[k] = A.ptr[ia + k]; - ptr_type end = e[k] = A.ptr[ia + k + 1]; - - if (beg == end) continue; - - col_type c = A.col[beg]; - - if (done) { - done = false; - cur_col = c; - } else { - cur_col = std::min(cur_col, c); - } - } - - while(!done) { - cur_col /= block_size; - ++Ap.ptr[ip + 1]; - - done = true; - col_type col_end = (cur_col + 1) * block_size; - for(unsigned k = 0; k < block_size; ++k) { - ptr_type beg = j[k]; - ptr_type end = e[k]; - - while(beg < end) { - col_type c = A.col[beg++]; - - if (c >= col_end) { - if (done) { - done = false; - cur_col = c; - } else { - cur_col = std::min(cur_col, c); - } - - break; - } - } - - j[k] = beg; - } - } - } - } - - Ap.set_nonzeros(Ap.scan_row_sizes()); - -#pragma omp parallel - { - std::vector j(block_size); - std::vector e(block_size); - -#pragma omp for - for(ptrdiff_t ip = 0; ip < np; ++ip) { - ptrdiff_t ia = ip * block_size; - col_type cur_col = 0; - ptr_type head = Ap.ptr[ip]; - bool done = true; - - for(unsigned k = 0; k < block_size; ++k) { - ptr_type beg = j[k] = A.ptr[ia + k]; - ptr_type end = e[k] = A.ptr[ia + k + 1]; - - if (beg == end) continue; - - col_type c = A.col[beg]; - - if (done) { - done = false; - cur_col = c; - } else { - cur_col = std::min(cur_col, c); - } - } - - while(!done) { - cur_col /= block_size; - - Ap.col[head] = cur_col; - - done = true; - bool first = true; - S cur_val = math::zero(); - - col_type col_end = (cur_col + 1) * block_size; - for(unsigned k = 0; k < block_size; ++k) { - ptr_type beg = j[k]; - ptr_type end = e[k]; - - while(beg < end) { - col_type c = A.col[beg]; - S v = math::norm(A.val[beg]); - ++beg; - - if (c >= col_end) { - if (done) { - done = false; - cur_col = c; - } else { - cur_col = std::min(cur_col, c); - } - - break; - } - - - if (first) { - first = false; - cur_val = v; - } else { - cur_val = std::max(cur_val, v); - } - } - - j[k] = beg; - } - - Ap.val[head++] = cur_val; - } - } - } - - AMGCL_TOC("pointwise_matrix"); - return ap; -} - -/** NUMA-aware vector container. */ -template -class numa_vector { - public: - typedef T value_type; - - numa_vector() : n(0), p(0) {} - - numa_vector(size_t n, bool init = true) : n(n), p(new T[n]) { - if (init) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) - p[i] = math::zero(); - } - } - - void resize(size_t size, bool init = true) { - delete[] p; p = 0; - - n = size; - p = new T[n]; - - if (init) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) - p[i] = math::zero(); - } - } - - template - numa_vector(const Vector &other, - typename std::enable_if::value, int>::type = 0 - ) : n(other.size()), p(new T[n]) - { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) - p[i] = other[i]; - } - - template - numa_vector(Iterator beg, Iterator end) - : n(std::distance(beg, end)), p(new T[n]) - { - static_assert(std::is_same< - std::random_access_iterator_tag, - typename std::iterator_traits::iterator_category - >::value, - "Iterator has to be random access"); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) - p[i] = beg[i]; - } - - ~numa_vector() { - delete[] p; p = 0; - } - - inline size_t size() const { - return n; - } - - inline const T& operator[](size_t i) const { - return p[i]; - } - - inline T& operator[](size_t i) { - return p[i]; - } - - inline const T* data() const { - return p; - } - - inline T* data() { - return p; - } - - void swap(numa_vector &other) { - std::swap(n, other.n); - std::swap(p, other.p); - } - - private: - size_t n; - T *p; -}; - -/// Diagonal of a matrix -template < typename V, typename C, typename P > -std::shared_ptr< numa_vector > diagonal(const crs &A, bool invert = false) -{ - const size_t n = rows(A); - auto dia = std::make_shared< numa_vector >(n, false); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - for(auto a = A.row_begin(i); a; ++a) { - if (a.col() == i) { - V d = a.value(); - if (invert) { - d = math::is_zero(d) ? math::identity() : math::inverse(d); - } - (*dia)[i] = d; - break; - } - } - } - - return dia; -} - -// Estimate spectral radius of the matrix. -// Use Gershgorin disk theorem when power_iters == 0, -// Use Power method when power_iters > 0. -// When scale = true, scale the matrix by its inverse diagonal. -template -static typename math::scalar_of::type>::type -spectral_radius(const Matrix &A, int power_iters = 0) { - AMGCL_TIC("spectral radius"); - typedef typename backend::value_type::type value_type; - typedef typename math::rhs_of::type rhs_type; - typedef typename math::scalar_of::type scalar_type; - - const ptrdiff_t n = backend::rows(A); - scalar_type radius; - - if (power_iters <= 0) { - // Use Gershgorin disk theorem. - radius = 0; - -#pragma omp parallel - { - scalar_type emax = 0; - value_type dia = math::identity(); - -#pragma omp for nowait - for(ptrdiff_t i = 0; i < n; ++i) { - scalar_type s = 0; - - for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { - ptrdiff_t c = A.col[j]; - value_type v = A.val[j]; - - s += math::norm(v); - - if (scale && c == i) dia = v; - } - - if (scale) s *= math::norm(math::inverse(dia)); - - emax = std::max(emax, s); - } - -#pragma omp critical - radius = std::max(radius, emax); - } - } else { - // Power method. - backend::numa_vector b0(n, false), b1(n, false); - - // Fill the initial vector with random values. - // Also extract the inverted matrix diagonal values. - scalar_type b0_norm = 0; -#pragma omp parallel - { -#ifdef _OPENMP - int tid = omp_get_thread_num(); -#else - int tid = 0; -#endif - std::mt19937 rng(tid); - std::uniform_real_distribution rnd(-1, 1); - - scalar_type loc_norm = 0; - -#pragma omp for nowait - for(ptrdiff_t i = 0; i < n; ++i) { - rhs_type v = math::constant(rnd(rng)); - - b0[i] = v; - loc_norm += math::norm(math::inner_product(v,v)); - } - -#pragma omp critical - b0_norm += loc_norm; - } - - // Normalize b0 - b0_norm = 1 / sqrt(b0_norm); -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - b0[i] = b0_norm * b0[i]; - } - - for(int iter = 0; iter < power_iters;) { - // b1 = scale ? (D^1 * A) * b0 : A * b0 - // b1_norm = ||b1|| - // radius = - scalar_type b1_norm = 0; - radius = 0; -#pragma omp parallel - { - scalar_type loc_norm = 0; - scalar_type loc_radi = 0; - value_type dia = math::identity(); - -#pragma omp for nowait - for(ptrdiff_t i = 0; i < n; ++i) { - rhs_type s = math::zero(); - - for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { - ptrdiff_t c = A.col[j]; - value_type v = A.val[j]; - if (scale && c == i) dia = v; - s += v * b0[c]; - } - - if (scale) s = math::inverse(dia) * s; - - loc_norm += math::norm(math::inner_product(s, s)); - loc_radi += math::norm(math::inner_product(s, b0[i])); - - b1[i] = s; - } - -#pragma omp critical - { - b1_norm += loc_norm; - radius += loc_radi; - } - } - - if (++iter < power_iters) { - // b0 = b1 / b1_norm - b1_norm = 1 / sqrt(b1_norm); -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - b0[i] = b1_norm * b1[i]; - } - } - } - } - AMGCL_TOC("spectral radius"); - - return radius < 0 ? static_cast(2) : radius; -} - -/** - * The builtin backend does not have any dependencies, and uses OpenMP for - * parallelization. Matrices are stored in the CRS format, and vectors are - * instances of ``std::vector``. There is no usual overhead of - * moving the constructed hierarchy to the builtin backend, since the backend - * is used internally during setup. - */ -template -struct builtin { - typedef ValueType value_type; - typedef ColumnType index_type; - typedef ColumnType col_type; - typedef PointerType ptr_type; - - typedef typename math::rhs_of::type rhs_type; - - struct provides_row_iterator : std::true_type {}; - - typedef crs matrix; - typedef numa_vector vector; - typedef numa_vector matrix_diagonal; - typedef solver::skyline_lu direct_solver; - - /// The backend has no parameters. - typedef amgcl::detail::empty_params params; - - static std::string name() { return "builtin"; } - - // Copy matrix. This is a noop for builtin backend. - static std::shared_ptr - copy_matrix(std::shared_ptr A, const params&) - { - return A; - } - - // Copy vector to builtin backend. - template - static std::shared_ptr< numa_vector > - copy_vector(const std::vector &x, const params&) - { - return std::make_shared< numa_vector >(x); - } - - // Copy vector to builtin backend. This is a noop for builtin backend. - template - static std::shared_ptr< numa_vector > - copy_vector(std::shared_ptr< numa_vector > x, const params&) - { - return x; - } - - // Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - struct gather { - std::vector I; - - gather(size_t /*size*/, const std::vector &I, const params&) - : I(I) { } - - template - void operator()(const InVec &vec, OutVec &vals) const { - for(size_t i = 0; i < I.size(); ++i) - vals[i] = vec[I[i]]; - } - }; - - struct scatter { - std::vector I; - - scatter(size_t /*size*/, const std::vector &I, const params&) - : I(I) { } - - template - void operator()(const InVec &vals, OutVec &vec) const { - for(size_t i = 0; i < I.size(); ++i) - vec[I[i]] = vals[i]; - } - }; - - // Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr A, const params&) { - return std::make_shared(*A); - } -}; - -template -struct is_builtin_vector : std::false_type {}; - -template -struct is_builtin_vector< std::vector > : std::is_arithmetic {}; - -template -struct is_builtin_vector< numa_vector > : std::true_type {}; - -//--------------------------------------------------------------------------- -// Specialization of backend interface -//--------------------------------------------------------------------------- -template -struct backends_compatible< builtin, builtin > : std::true_type {}; - -template < typename V, typename C, typename P > -struct rows_impl< crs > { - static size_t get(const crs &A) { - return A.nrows; - } -}; - -template < typename V, typename C, typename P > -struct cols_impl< crs > { - static size_t get(const crs &A) { - return A.ncols; - } -}; - -template < class Vec > -struct bytes_impl< - Vec, - typename std::enable_if< is_builtin_vector::value >::type - > -{ - static size_t get(const Vec &x) { - typedef typename backend::value_type::type V; - return x.size() * sizeof(V); - } -}; - -template < typename V, typename C, typename P > -struct ptr_data_impl< crs > { - typedef const P* type; - static type get(const crs &A) { - return &A.ptr[0]; - } -}; - -template < typename V, typename C, typename P > -struct col_data_impl< crs > { - typedef const C* type; - static type get(const crs &A) { - return &A.col[0]; - } -}; - -template < typename V, typename C, typename P > -struct val_data_impl< crs > { - typedef const V* type; - static type get(const crs &A) { - return &A.val[0]; - } -}; - -template < typename V, typename C, typename P > -struct nonzeros_impl< crs > { - static size_t get(const crs &A) { - return A.nrows == 0 ? 0 : A.ptr[A.nrows]; - } -}; - -template < typename V, typename C, typename P > -struct row_nonzeros_impl< crs > { - static size_t get(const crs &A, size_t row) { - return A.ptr[row + 1] - A.ptr[row]; - } -}; - -template < class Vec > -struct clear_impl< - Vec, - typename std::enable_if< is_builtin_vector::value >::type - > -{ - static void apply(Vec &x) - { - typedef typename backend::value_type::type V; - - const size_t n = x.size(); -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - x[i] = math::zero(); - } - } -}; - -template < class Vec1, class Vec2 > -struct inner_product_impl< - Vec1, Vec2, - typename std::enable_if< - is_builtin_vector::value && - is_builtin_vector::value - >::type - > -{ - typedef typename value_type::type V; - - typedef typename math::inner_product_impl::return_type return_type; - - static return_type get(const Vec1 &x, const Vec2 &y) { -#ifdef _OPENMP - if (omp_get_max_threads() > 1) { - return parallel(x, y); - } else -#endif - { - return serial(x, y); - } - } - - static return_type serial(const Vec1 &x, const Vec2 &y) { - const size_t n = x.size(); - - return_type s = math::zero(); - return_type c = math::zero(); - - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - return_type d = math::inner_product(x[i], y[i]) - c; - return_type t = s + d; - c = (t - s) - d; - s = t; - } - - return s; - } - -#ifdef _OPENMP -# ifndef AMGCL_MAX_OPENMP_THREADS -# define AMGCL_MAX_OPENMP_THREADS 64 -# endif - static return_type parallel(const Vec1 &x, const Vec2 &y) - { - const size_t n = x.size(); - return_type _sum_stat[AMGCL_MAX_OPENMP_THREADS]; - std::vector _sum_dyna; - return_type *sum; - - const int nt = omp_get_max_threads(); - - if (nt < 64) { - sum = _sum_stat; - for(int i = 0; i < nt; ++i) { - sum[i] = math::zero(); - } - } else { - _sum_dyna.resize(nt, math::zero()); - sum = _sum_dyna.data(); - } - -#pragma omp parallel - { - const int tid = omp_get_thread_num(); - - return_type s = math::zero(); - return_type c = math::zero(); - -#pragma omp for nowait - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - return_type d = math::inner_product(x[i], y[i]) - c; - return_type t = s + d; - c = (t - s) - d; - s = t; - } - - sum[tid] = s; - } - - return std::accumulate(sum, sum + nt, math::zero()); - } -#endif -}; - -template -struct axpby_impl< - A, Vec1, B, Vec2, - typename std::enable_if< - is_builtin_vector::value && - is_builtin_vector::value - >::type - > -{ - static void apply(A a, const Vec1 &x, B b, Vec2 &y) - { - const size_t n = x.size(); - if (!math::is_zero(b)) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - y[i] = a * x[i] + b * y[i]; - } - } else { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - y[i] = a * x[i]; - } - } - } -}; - -template < class A, class Vec1, class B, class Vec2, class C, class Vec3 > -struct axpbypcz_impl< - A, Vec1, B, Vec2, C, Vec3, - typename std::enable_if< - is_builtin_vector::value && - is_builtin_vector::value && - is_builtin_vector::value - >::type - > -{ - static void apply(A a, const Vec1 &x, B b, const Vec2 &y, C c, Vec3 &z) - { - const size_t n = x.size(); - if (!math::is_zero(c)) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - z[i] = a * x[i] + b * y[i] + c * z[i]; - } - } else { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - z[i] = a * x[i] + b * y[i]; - } - } - } -}; - -template < class Alpha, class Vec1, class Vec2, class Beta, class Vec3 > -struct vmul_impl< - Alpha, Vec1, Vec2, Beta, Vec3, - typename std::enable_if< - is_builtin_vector::value && - is_builtin_vector::value && - is_builtin_vector::value && - math::static_rows::type>::value == math::static_rows::type>::value && - math::static_rows::type>::value == math::static_rows::type>::value - >::type - > -{ - static void apply(Alpha a, const Vec1 &x, const Vec2 &y, Beta b, Vec3 &z) - { - const size_t n = x.size(); - if (!math::is_zero(b)) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - z[i] = a * x[i] * y[i] + b * z[i]; - } - } else { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - z[i] = a * x[i] * y[i]; - } - } - } -}; - -// Support for mixed scalar/nonscalar types -template < class Alpha, class Vec1, class Vec2, class Beta, class Vec3 > -struct vmul_impl< - Alpha, Vec1, Vec2, Beta, Vec3, - typename std::enable_if< - is_builtin_vector::value && - is_builtin_vector::value && - is_builtin_vector::value && - ( - math::static_rows::type>::value != math::static_rows::type>::value || - math::static_rows::type>::value != math::static_rows::type>::value - ) - >::type - > -{ - static void apply(Alpha a, const Vec1 &x, const Vec2 &y, Beta b, Vec3 &z) - { - typedef typename value_type::type M_type; - auto Y = backend::reinterpret_as_rhs(y); - auto Z = backend::reinterpret_as_rhs(z); - - const size_t n = x.size(); - - if (!math::is_zero(b)) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - Z[i] = a * x[i] * Y[i] + b * Z[i]; - } - } else { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - Z[i] = a * x[i] * Y[i]; - } - } - } -}; - -template < class Vec1, class Vec2 > -struct copy_impl< - Vec1, Vec2, - typename std::enable_if< - is_builtin_vector::value && - is_builtin_vector::value - >::type - > -{ - static void apply(const Vec1 &x, Vec2 &y) - { - const size_t n = x.size(); -#pragma omp parallel for - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { - y[i] = x[i]; - } - } -}; - -template -struct reinterpret_as_rhs_impl< - MatrixValue, Vector, IsConst, - typename std::enable_if::value>::type - > -{ - typedef typename backend::value_type::type src_type; - typedef typename math::scalar_of::type scalar_type; - typedef typename math::rhs_of::type rhs_type; - typedef typename math::replace_scalar::type dst_type; - typedef typename std::conditional::type ptr_type; - typedef iterator_range return_type; - - template - static return_type get(V &&x) { - auto ptr = reinterpret_cast(&x[0]); - const size_t n = x.size() * sizeof(src_type) / sizeof(dst_type); - return make_iterator_range(ptr, ptr + n); - } -}; - -namespace detail { - -template -struct use_builtin_matrix_ops< amgcl::backend::crs > - : std::true_type -{}; - -} // namespace detail - -} // namespace backend -} // namespace amgcl - - -// Allow to use boost::iterator_range as vector in builtin backend: -namespace boost { template class iterator_range; } - -namespace amgcl { -namespace backend { - -template -struct is_builtin_vector< amgcl::iterator_range > : std::true_type {}; - -template -struct is_builtin_vector< boost::iterator_range > : std::true_type {}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/builtin_hybrid.hpp b/src/solvers/amgcl/backend/builtin_hybrid.hpp deleted file mode 100644 index b512b20..0000000 --- a/src/solvers/amgcl/backend/builtin_hybrid.hpp +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef AMGCL_BACKEND_BUILTIN_HYBRID_HPP -#define AMGCL_BACKEND_BUILTIN_HYBRID_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/builtin_hybrid.hpp - * \author Denis Demidov - * \brief Builtin backend that uses scalar matrices to build the hierarchy, but stores the computed matrix in block format. - */ - -#include -#include -#include - -namespace amgcl { -namespace backend { - -// Hybrid backend uses scalar matrices to build the hierarchy, -// but stores the computed matrices in the block format. -template -struct builtin_hybrid : public builtin::type, ColumnType, PointerType> -{ - typedef typename math::scalar_of::type ScalarType; - typedef builtin Base; - typedef crs matrix; - struct provides_row_iterator : std::false_type {}; - - static std::shared_ptr - copy_matrix(std::shared_ptr As, const typename Base::params&) - { - return std::make_shared(amgcl::adapter::block_matrix(*As)); - } -}; - -template -struct backends_compatible< builtin_hybrid, builtin_hybrid > : std::true_type {}; - -template -struct backends_compatible< builtin, builtin_hybrid > : std::true_type {}; - -template -struct backends_compatible< builtin_hybrid, builtin > : std::true_type {}; - -} // namespace backend -} // namespace amgcl -#endif diff --git a/src/solvers/amgcl/backend/cuda.hpp b/src/solvers/amgcl/backend/cuda.hpp deleted file mode 100644 index a0b45dc..0000000 --- a/src/solvers/amgcl/backend/cuda.hpp +++ /dev/null @@ -1,753 +0,0 @@ -#ifndef AMGCL_BACKEND_CUDA_HPP -#define AMGCL_BACKEND_CUDA_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/cuda.hpp - * \author Denis Demidov - * \brief CUDA backend. - */ - -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace amgcl { - -namespace solver { - -/** Wrapper around solver::skyline_lu for use with the CUDA backend. - * Copies the rhs to the host memory, solves the problem using the host CPU, - * then copies the solution back to the compute device(s). - */ -template -struct cuda_skyline_lu : solver::skyline_lu { - typedef solver::skyline_lu Base; - - mutable std::vector _rhs, _x; - - template - cuda_skyline_lu(const Matrix &A, const Params&) - : Base(*A), _rhs(backend::rows(*A)), _x(backend::rows(*A)) - { } - - template - void operator()(const Vec1 &rhs, Vec2 &x) const { - thrust::copy(rhs.begin(), rhs.end(), _rhs.begin()); - static_cast(this)->operator()(_rhs, _x); - thrust::copy(_x.begin(), _x.end(), x.begin()); - } - - size_t bytes() const { - return - backend::bytes(*static_cast(this)) + - backend::bytes(_rhs) + - backend::bytes(_x); - } -}; - -} - -namespace backend { -namespace detail { - -inline void cuda_check(cusparseStatus_t rc, const char *file, int line) { - if (rc != CUSPARSE_STATUS_SUCCESS) { - std::ostringstream msg; - msg << "CUDA error " << rc << " at \"" << file << ":" << line; - precondition(false, msg.str()); - } -} - -inline void cuda_check(cudaError_t rc, const char *file, int line) { - if (rc != cudaSuccess) { - std::ostringstream msg; - msg << "CUDA error " << rc << " at \"" << file << ":" << line; - precondition(false, msg.str()); - } -} - -#define AMGCL_CALL_CUDA(rc) \ - amgcl::backend::detail::cuda_check(rc, __FILE__, __LINE__) - -struct cuda_deleter { - void operator()(cusparseMatDescr_t handle) { - AMGCL_CALL_CUDA( cusparseDestroyMatDescr(handle) ); - } - -#if CUDART_VERSION < 11000 - void operator()(cusparseHybMat_t handle) { - AMGCL_CALL_CUDA( cusparseDestroyHybMat(handle) ); - } -#endif - - void operator()(csrilu02Info_t handle) { - AMGCL_CALL_CUDA( cusparseDestroyCsrilu02Info(handle) ); - } - - void operator()(csrsv2Info_t handle) { - AMGCL_CALL_CUDA( cusparseDestroyCsrsv2Info(handle) ); - } - - void operator()(cudaEvent_t handle) { - AMGCL_CALL_CUDA( cudaEventDestroy(handle) ); - } -}; - - -} // namespace detail - -#if CUDART_VERSION >= 11000 -/// CUSPARSE matrix in CSR format. -template -class cuda_matrix { - public: - typedef real value_type; - - cuda_matrix( - size_t n, size_t m, - const ptrdiff_t *ptr, - const ptrdiff_t *col, - const real *val, - cusparseHandle_t handle - ) - : nrows(n), ncols(m), nnz(ptr[n]), handle(handle), - ptr(ptr, ptr + n + 1), col(col, col + nnz), val(val, val + nnz), - desc(create_description(), backend::detail::cuda_deleter()) - { - } - - void spmv( - real alpha, thrust::device_vector const &x, - real beta, thrust::device_vector &y - ) const - { - size_t buf_size; - AMGCL_CALL_CUDA( - cusparseCsrmvEx_bufferSize( - handle, - CUSPARSE_ALG_MERGE_PATH, - CUSPARSE_OPERATION_NON_TRANSPOSE, - nrows, - ncols, - nnz, - &alpha, datatype(), - desc.get(), - thrust::raw_pointer_cast(&val[0]), datatype(), - thrust::raw_pointer_cast(&ptr[0]), - thrust::raw_pointer_cast(&col[0]), - thrust::raw_pointer_cast(&x[0]), datatype(), - &beta, datatype(), - thrust::raw_pointer_cast(&y[0]), datatype(), - datatype(), - &buf_size) - ); - - if (buf.size() < buf_size) - buf.resize(buf_size); - - AMGCL_CALL_CUDA( - cusparseCsrmvEx( - handle, - CUSPARSE_ALG_MERGE_PATH, - CUSPARSE_OPERATION_NON_TRANSPOSE, - nrows, - ncols, - nnz, - &alpha, datatype(), - desc.get(), - thrust::raw_pointer_cast(&val[0]), datatype(), - thrust::raw_pointer_cast(&ptr[0]), - thrust::raw_pointer_cast(&col[0]), - thrust::raw_pointer_cast(&x[0]), datatype(), - &beta, datatype(), - thrust::raw_pointer_cast(&y[0]), datatype(), - datatype(), - thrust::raw_pointer_cast(&buf[0]) - ) - ); - } - - size_t rows() const { return nrows; } - size_t cols() const { return ncols; } - size_t nonzeros() const { return nnz; } - size_t bytes() const { - return - sizeof(int) * (nrows + 1) + - sizeof(int) * nnz + - sizeof(real) * nnz; - } - private: - size_t nrows, ncols, nnz; - - cusparseHandle_t handle; - - std::shared_ptr::type> desc; - - thrust::device_vector ptr; - thrust::device_vector col; - thrust::device_vector val; - - mutable thrust::device_vector buf; - - static cusparseMatDescr_t create_description() { - cusparseMatDescr_t desc; - AMGCL_CALL_CUDA( cusparseCreateMatDescr(&desc) ); - AMGCL_CALL_CUDA( cusparseSetMatType(desc, CUSPARSE_MATRIX_TYPE_GENERAL) ); - AMGCL_CALL_CUDA( cusparseSetMatIndexBase(desc, CUSPARSE_INDEX_BASE_ZERO) ); - return desc; - } - - static cudaDataType datatype() { - if (sizeof(real) == sizeof(float)) - return CUDA_R_32F; - else - return CUDA_R_64F; - } -}; - -#else // CUDART_VERSION >= 11000 - -/// CUSPARSE matrix in Hyb format. -template -class cuda_matrix { - public: - typedef real value_type; - - cuda_matrix( - size_t n, size_t m, - const ptrdiff_t *ptr, - const ptrdiff_t *col, - const real *val, - cusparseHandle_t handle - ) - : nrows(n), ncols(m), nnz(ptr[n]), handle( handle ), - desc ( create_description(), backend::detail::cuda_deleter() ), - mat ( create_matrix(), backend::detail::cuda_deleter() ) - { - fill_matrix(n, m, ptr, col, val); - } - - void spmv( - real alpha, thrust::device_vector const &x, - real beta, thrust::device_vector &y - ) const - { - spmv(alpha, x, beta, y, std::integral_constant()); - } - - void spmv( - real alpha, thrust::device_vector const &x, - real beta, thrust::device_vector &y, - std::false_type - ) const - { - AMGCL_CALL_CUDA( - cusparseShybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, desc.get(), mat.get(), - thrust::raw_pointer_cast(&x[0]), &beta, - thrust::raw_pointer_cast(&y[0]) - ) - ); - } - - void spmv( - real alpha, thrust::device_vector const &x, - real beta, thrust::device_vector &y, - std::true_type - ) const - { - AMGCL_CALL_CUDA( - cusparseDhybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, desc.get(), mat.get(), - thrust::raw_pointer_cast(&x[0]), &beta, - thrust::raw_pointer_cast(&y[0]) - ) - ); - } - - size_t rows() const { return nrows; } - size_t cols() const { return ncols; } - size_t nonzeros() const { return nnz; } - size_t bytes() const { - return - sizeof(int) * (nrows + 1) + - sizeof(int) * nnz + - sizeof(real) * nnz; - } - private: - size_t nrows, ncols, nnz; - - cusparseHandle_t handle; - - std::shared_ptr::type> desc; - std::shared_ptr::type> mat; - - static cusparseMatDescr_t create_description() { - cusparseMatDescr_t desc; - AMGCL_CALL_CUDA( cusparseCreateMatDescr(&desc) ); - AMGCL_CALL_CUDA( cusparseSetMatType(desc, CUSPARSE_MATRIX_TYPE_GENERAL) ); - AMGCL_CALL_CUDA( cusparseSetMatIndexBase(desc, CUSPARSE_INDEX_BASE_ZERO) ); - return desc; - } - - static cusparseHybMat_t create_matrix() { - cusparseHybMat_t mat; - AMGCL_CALL_CUDA( cusparseCreateHybMat(&mat) ); - return mat; - } - - void fill_matrix(size_t n, size_t m, - const ptrdiff_t *ptr, const ptrdiff_t *col, const float *val - ) - { - thrust::device_vector p(ptr, ptr + n + 1); - thrust::device_vector c(col, col + ptr[n]); - thrust::device_vector v(val, val + ptr[n]); - - AMGCL_CALL_CUDA( - cusparseScsr2hyb(handle, n, m, desc.get(), - thrust::raw_pointer_cast(&v[0]), - thrust::raw_pointer_cast(&p[0]), - thrust::raw_pointer_cast(&c[0]), - mat.get(), 0, CUSPARSE_HYB_PARTITION_AUTO - ) - ); - } - - void fill_matrix(size_t n, size_t m, - const ptrdiff_t *ptr, const ptrdiff_t *col, const double *val - ) - { - thrust::device_vector p(ptr, ptr + n + 1); - thrust::device_vector c(col, col + ptr[n]); - thrust::device_vector v(val, val + ptr[n]); - - AMGCL_CALL_CUDA( - cusparseDcsr2hyb(handle, n, m, desc.get(), - thrust::raw_pointer_cast(&v[0]), - thrust::raw_pointer_cast(&p[0]), - thrust::raw_pointer_cast(&c[0]), - mat.get(), 0, CUSPARSE_HYB_PARTITION_AUTO - ) - ); - } -}; - -#endif // CUDART_VERSION >= 11000 - -/// CUDA backend. -/** - * Uses CUSPARSE for matrix operations and Thrust for vector operations. - * - * \param real Value type. - * \ingroup backends - */ -template > -struct cuda { - static_assert( - std::is_same::value || - std::is_same::value, - "Unsupported value type for cuda backend" - ); - - typedef real value_type; - typedef ptrdiff_t col_type; - typedef ptrdiff_t ptr_type; - typedef cuda_matrix matrix; - typedef thrust::device_vector vector; - typedef thrust::device_vector matrix_diagonal; - typedef DirectSolver direct_solver; - - struct provides_row_iterator : std::false_type {}; - - /// Backend parameters. - struct params { - /// CUSPARSE handle. - cusparseHandle_t cusparse_handle; - - params(cusparseHandle_t handle = 0) : cusparse_handle(handle) {} - -#ifndef AMGCL_NO_BOOST - params(const boost::property_tree::ptree &p) - : AMGCL_PARAMS_IMPORT_VALUE(p, cusparse_handle) - { - check_params(p, {"cusparse_handle"}); - } - - void get(boost::property_tree::ptree &p, const std::string &path) const { - AMGCL_PARAMS_EXPORT_VALUE(p, path, cusparse_handle); - } -#endif - }; - - static std::string name() { return "cuda"; } - - /// Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix(std::shared_ptr< typename builtin::matrix > A, const params &prm) - { - return std::make_shared(rows(*A), cols(*A), - A->ptr, A->col, A->val, prm.cusparse_handle - ); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(typename builtin::vector const &x, const params&) - { - return std::make_shared(x.data(), x.data() + x.size()); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(std::shared_ptr< typename builtin::vector > x, const params &prm) - { - return copy_vector(*x, prm); - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - /// Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr< typename builtin::matrix > A, const params &prm) - { - return std::make_shared(A, prm); - } - - struct gather { - thrust::device_vector I; - mutable thrust::device_vector T; - - gather(size_t src_size, const std::vector &I, const params&) - : I(I), T(I.size()) - { } - - void operator()(const vector &src, vector &dst) const { - thrust::gather(I.begin(), I.end(), src.begin(), dst.begin()); - } - - void operator()(const vector &vec, std::vector &vals) const { - thrust::gather(I.begin(), I.end(), vec.begin(), T.begin()); - thrust::copy(T.begin(), T.end(), vals.begin()); - } - }; - - struct scatter { - thrust::device_vector I; - - scatter(size_t size, const std::vector &I, const params &) - : I(I) - { } - - void operator()(const vector &src, vector &dst) const { - thrust::scatter(src.begin(), src.end(), I.begin(), dst.begin()); - } - }; -}; - -//--------------------------------------------------------------------------- -// Backend interface implementation -//--------------------------------------------------------------------------- -template < typename V > -struct bytes_impl< thrust::device_vector > { - static size_t get(const thrust::device_vector &v) { - return v.size() * sizeof(V); - } -}; - -template < typename Alpha, typename Beta, typename V > -struct spmv_impl< - Alpha, cuda_matrix, thrust::device_vector, - Beta, thrust::device_vector - > -{ - typedef cuda_matrix matrix; - typedef thrust::device_vector vector; - - static void apply(Alpha alpha, const matrix &A, const vector &x, - Beta beta, vector &y) - { - A.spmv(alpha, x, beta, y); - } -}; - -template < typename V > -struct residual_impl< - cuda_matrix, - thrust::device_vector, - thrust::device_vector, - thrust::device_vector - > -{ - typedef cuda_matrix matrix; - typedef thrust::device_vector vector; - - static void apply(const vector &rhs, const matrix &A, const vector &x, - vector &r) - { - thrust::copy(rhs.begin(), rhs.end(), r.begin()); - A.spmv(-1, x, 1, r); - } -}; - -template < typename V > -struct clear_impl< thrust::device_vector > -{ - typedef thrust::device_vector vector; - - static void apply(vector &x) - { - thrust::fill(x.begin(), x.end(), V()); - } -}; - -template -struct copy_impl > -{ - static void apply(const V &x, thrust::device_vector &y) - { - thrust::copy(x.begin(), x.end(), y.begin()); - } -}; - -template -struct copy_impl, V > -{ - static void apply(const thrust::device_vector &x, V &y) - { - thrust::copy(x.begin(), x.end(), y.begin()); - } -}; - -template -struct copy_impl, thrust::device_vector > -{ - static void apply(const thrust::device_vector &x, thrust::device_vector &y) - { - thrust::copy(x.begin(), x.end(), y.begin()); - } -}; - -template < typename V > -struct inner_product_impl< - thrust::device_vector, - thrust::device_vector - > -{ - typedef thrust::device_vector vector; - - static V get(const vector &x, const vector &y) - { - return thrust::inner_product(x.begin(), x.end(), y.begin(), V()); - } -}; - -template < typename A, typename B, typename V > -struct axpby_impl< - A, thrust::device_vector, - B, thrust::device_vector - > -{ - typedef thrust::device_vector vector; - - struct functor { - A a; - B b; - functor(A a, B b) : a(a), b(b) {} - - template - __host__ __device__ void operator()( Tuple t ) const { - using thrust::get; - - if (b) - get<1>(t) = a * get<0>(t) + b * get<1>(t); - else - get<1>(t) = a * get<0>(t); - } - }; - - static void apply(A a, const vector &x, B b, vector &y) - { - thrust::for_each( - thrust::make_zip_iterator( - thrust::make_tuple( - x.begin(), y.begin() - ) - ), - thrust::make_zip_iterator( - thrust::make_tuple( - x.end(), y.end() - ) - ), - functor(a, b) - ); - } -}; - -template < typename A, typename B, typename C, typename V > -struct axpbypcz_impl< - A, thrust::device_vector, - B, thrust::device_vector, - C, thrust::device_vector - > -{ - typedef thrust::device_vector vector; - - struct functor { - A a; - B b; - C c; - - functor(A a, B b, C c) : a(a), b(b), c(c) {} - - template - __host__ __device__ void operator()( Tuple t ) const { - using thrust::get; - - if (c) - get<2>(t) = a * get<0>(t) + b * get<1>(t) + c * get<2>(t); - else - get<2>(t) = a * get<0>(t) + b * get<1>(t); - } - }; - - static void apply( - A a, const vector &x, - B b, const vector &y, - C c, vector &z - ) - { - thrust::for_each( - thrust::make_zip_iterator( - thrust::make_tuple( - x.begin(), y.begin(), z.begin() - ) - ), - thrust::make_zip_iterator( - thrust::make_tuple( - x.end(), y.end(), z.end() - ) - ), - functor(a, b, c) - ); - } -}; - -template < typename A, typename B, typename V > -struct vmul_impl< - A, thrust::device_vector, thrust::device_vector, - B, thrust::device_vector - > -{ - typedef thrust::device_vector vector; - - struct functor { - A a; - B b; - functor(A a, B b) : a(a), b(b) {} - - template - __host__ __device__ void operator()( Tuple t ) const { - using thrust::get; - - if (b) - get<2>(t) = a * get<0>(t) * get<1>(t) + b * get<2>(t); - else - get<2>(t) = a * get<0>(t) * get<1>(t); - } - }; - - static void apply(A a, const vector &x, const vector &y, B b, vector &z) - { - thrust::for_each( - thrust::make_zip_iterator( - thrust::make_tuple( - x.begin(), y.begin(), z.begin() - ) - ), - thrust::make_zip_iterator( - thrust::make_tuple( - x.end(), y.end(), z.end() - ) - ), - functor(a, b) - ); - } -}; - -class cuda_event { - public: - cuda_event() : e(create_event(), backend::detail::cuda_deleter()) { } - - float operator-(cuda_event tic) const { - float delta; - cudaEventSynchronize(e.get()); - cudaEventElapsedTime(&delta, tic.e.get(), e.get()); - return delta / 1000.0f; - } - private: - std::shared_ptr::type> e; - - static cudaEvent_t create_event() { - cudaEvent_t e; - cudaEventCreate(&e); - cudaEventRecord(e, 0); - return e; - } -}; - -struct cuda_clock { - typedef cuda_event value_type; - - static const char* units() { return "s"; } - - cuda_event current() const { - return cuda_event(); - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/detail/default_direct_solver.hpp b/src/solvers/amgcl/backend/detail/default_direct_solver.hpp deleted file mode 100644 index 24cd00b..0000000 --- a/src/solvers/amgcl/backend/detail/default_direct_solver.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef AMGCL_BACKEND_DETAIL_DEFAULT_DIRECT_SOLVER_HPP -#define AMGCL_BACKEND_DETAIL_DEFAULT_DIRECT_SOLVER_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/detail/default_direct_solver.hpp - * \author Denis Demidov - * \brief Default direct solver for coarse level. - */ - -#include -#include - -namespace amgcl { -namespace backend { -namespace detail { - -template -struct default_direct_solver { - typedef typename Backend::value_type real; - typedef typename math::scalar_of::type scalar; - typedef typename Backend::matrix matrix; - typedef typename builtin::matrix host_matrix; - - std::shared_ptr Ainv; - - default_direct_solver( - std::shared_ptr A, - typename Backend::params const &prm - ) - { - auto ainv = std::make_shared(); - *ainv = inverse(*A); - Ainv = Backend::copy_matrix(ainv, prm); - } - - template - void operator()(const Vec1 &rhs, Vec2 &x) const { - backend::spmv(math::identity(), *Ainv, rhs, math::zero(), x); - } - - static size_t coarse_enough() { return 500; } -}; - -} // namespace detail -} // namespace backend -} // namespace amgcl - - - -#endif diff --git a/src/solvers/amgcl/backend/detail/matrix_ops.hpp b/src/solvers/amgcl/backend/detail/matrix_ops.hpp deleted file mode 100644 index 5bbe3d9..0000000 --- a/src/solvers/amgcl/backend/detail/matrix_ops.hpp +++ /dev/null @@ -1,174 +0,0 @@ -#ifndef AMGCL_BACKEND_DETAIL_MATRIX_OPS_HPP -#define AMGCL_BACKEND_DETAIL_MATRIX_OPS_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/adapter/detail/matrix_ops.hpp - * \author Denis Demidov - * \brief Sparse matrix operations for matrices that provide row_iterator. - */ - -#include -#include -#include - -namespace amgcl { -namespace backend { -namespace detail { - -template -struct use_builtin_matrix_ops : std::false_type {}; - -} // namespace detail - -template -struct spmv_impl< - Alpha, Matrix, Vector1, Beta, Vector2, - typename std::enable_if< - detail::use_builtin_matrix_ops::value && - math::static_rows::type>::value == math::static_rows::type>::value && - math::static_rows::type>::value == math::static_rows::type>::value - >::type - > -{ - static void apply( - Alpha alpha, const Matrix &A, const Vector1 &x, Beta beta, Vector2 &y - ) - { - typedef typename value_type::type V; - - const ptrdiff_t n = static_cast( rows(A) ); - - if (!math::is_zero(beta)) { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - V sum = math::zero(); - for(typename row_iterator::type a = row_begin(A, i); a; ++a) - sum += a.value() * x[ a.col() ]; - y[i] = alpha * sum + beta * y[i]; - } - } else { -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - V sum = math::zero(); - for(typename row_iterator::type a = row_begin(A, i); a; ++a) - sum += a.value() * x[ a.col() ]; - y[i] = alpha * sum; - } - } - } -}; - -template -struct residual_impl< - Matrix, Vector1, Vector2, Vector3, - typename std::enable_if< - detail::use_builtin_matrix_ops::value && - math::static_rows::type>::value == math::static_rows::type>::value && - math::static_rows::type>::value == math::static_rows::type>::value && - math::static_rows::type>::value == math::static_rows::type>::value - >::type - > -{ - static void apply( - Vector1 const &rhs, - Matrix const &A, - Vector2 const &x, - Vector3 &res - ) - { - typedef typename value_type::type V; - - const ptrdiff_t n = static_cast( rows(A) ); - -#pragma omp parallel for - for(ptrdiff_t i = 0; i < n; ++i) { - V sum = math::zero(); - for(typename row_iterator::type a = row_begin(A, i); a; ++a) - sum += a.value() * x[ a.col() ]; - res[i] = rhs[i] - sum; - } - } -}; - -/* Allows to do matrix-vector products with mixed scalar/nonscalar types. - * Reinterprets pointers to the vectors data into appropriate types. - */ -template -struct spmv_impl< - Alpha, Matrix, Vector1, Beta, Vector2, - typename std::enable_if< - detail::use_builtin_matrix_ops::value && ( - math::static_rows::type>::value != math::static_rows::type>::value || - math::static_rows::type>::value != math::static_rows::type>::value) - >::type - > -{ - static void apply( - Alpha alpha, const Matrix &A, const Vector1 &x, Beta beta, Vector2 &y - ) - { - typedef typename value_type::type V; - - auto X = backend::reinterpret_as_rhs(x); - auto Y = backend::reinterpret_as_rhs(y); - - spmv(alpha, A, X, beta, Y); - } -}; - -template -struct residual_impl< - Matrix, Vector1, Vector2, Vector3, - typename std::enable_if< - detail::use_builtin_matrix_ops::value && ( - math::static_rows::type>::value != math::static_rows::type>::value || - math::static_rows::type>::value != math::static_rows::type>::value || - math::static_rows::type>::value != math::static_rows::type>::value) - >::type - > -{ - static void apply( - Vector1 const &f, - Matrix const &A, - Vector2 const &x, - Vector3 &r - ) - { - typedef typename value_type::type V; - - auto X = backend::reinterpret_as_rhs(x); - auto F = backend::reinterpret_as_rhs(f); - auto R = backend::reinterpret_as_rhs(r); - - residual(F, A, X, R); - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/detail/mixing.hpp b/src/solvers/amgcl/backend/detail/mixing.hpp deleted file mode 100644 index 8b9a473..0000000 --- a/src/solvers/amgcl/backend/detail/mixing.hpp +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef AMGCL_BACKEND_DETAIL_MIXING_HPP -#define AMGCL_BACKEND_DETAIL_MIXING_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov -Copyright (c) 2016, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/detail/mixing.hpp - * \author Denis Demidov - * \brief Utilities for mixed-precision of mixed-block backends. - */ - -#include -#include -#include - -namespace amgcl { -namespace backend { -namespace detail { - -// Backend with scalar value_type of highest precision. - -template -struct common_scalar_backend; - -template -struct common_scalar_backend::value == 1 - >::type > -{ - typedef B type; -}; - -template -struct common_scalar_backend< backend::builtin, backend::builtin, - typename std::enable_if< - math::static_rows::value != 1 || - math::static_rows::value != 1 - >::type> -{ - typedef typename math::scalar_of::type S1; - typedef typename math::scalar_of::type S2; - - typedef - typename std::conditional< - (sizeof(S1) > sizeof(S2)), backend::builtin, backend::builtin - >::type - type; -}; - -} // namespace detail -} // namespace backend -} // namespace amgcl - - - -#endif diff --git a/src/solvers/amgcl/backend/eigen.hpp b/src/solvers/amgcl/backend/eigen.hpp deleted file mode 100644 index a4c26ff..0000000 --- a/src/solvers/amgcl/backend/eigen.hpp +++ /dev/null @@ -1,277 +0,0 @@ -#ifndef AMGCL_BACKEND_EIGEN_HPP -#define AMGCL_BACKEND_EIGEN_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/eigen.hpp - * \author Denis Demidov - * \brief Sparse matrix in CRS format. - */ - -#include -#include -#include - -namespace amgcl { -namespace backend { - -/// Eigen backend. -/** - * This is a backend that uses types defined in the Eigen library - * (http://eigen.tuxfamily.org). - * - * \param real Value type. - * \ingroup backends - */ -template -struct eigen { - typedef real value_type; - typedef ptrdiff_t index_type; - typedef ptrdiff_t col_type; - typedef ptrdiff_t ptr_type; - - typedef - Eigen::Map> - matrix; - - typedef Eigen::Matrix vector; - typedef Eigen::Matrix matrix_diagonal; - - typedef solver::skyline_lu direct_solver; - - struct provides_row_iterator : std::true_type {}; - - /// Backend parameters. - typedef amgcl::detail::empty_params params; - - static std::string name() { return "eigen"; } - - /// Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix(std::shared_ptr< typename builtin::matrix > A, const params&) - { - const typename builtin::matrix &a = *A; - - return std::shared_ptr( - new matrix( - rows(*A), cols(*A), nonzeros(*A), - const_cast(a.ptr), - const_cast(a.col), - const_cast(a.val) - ), - hold_host(A) - ); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(typename builtin::vector const &x, const params&) - { - return std::make_shared( - Eigen::Map(x.data(), x.size()) - ); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(std::shared_ptr< typename builtin::vector > x, const params &prm) - { - return copy_vector(*x, prm); - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - /// Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr< typename builtin::matrix > A, const params&) - { - return std::make_shared(*A); - } - - private: - struct hold_host { - typedef std::shared_ptr< crs > host_matrix; - host_matrix host; - - hold_host( host_matrix host ) : host(host) {} - - void operator()(matrix *ptr) const { - delete ptr; - } - }; - -}; - -template < class Alpha, class M, class V1, class Beta, class V2 > -struct spmv_impl< - Alpha, M, V1, Beta, V2, - typename std::enable_if< - is_eigen_sparse_matrix::value && - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - static void apply(Alpha alpha, const M &A, const V1 &x, Beta beta, V2 &y) - { - if (!math::is_zero(beta)) - y = alpha * A * x + beta * y; - else - y = alpha * A * x; - } -}; - -template < class M, class V1, class V2, class V3 > -struct residual_impl< - M, V1, V2, V3, - typename std::enable_if< - is_eigen_sparse_matrix::value && - is_eigen_type::value && - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - static void apply(const V1 &rhs, const M &A, const V2 &x, V3 &r) - { - r = rhs - A * x; - } -}; - -template < typename V > -struct clear_impl< - V, - typename std::enable_if::value>::type - > -{ - static void apply(V &x) - { - x.setZero(); - } -}; - -template < class V1, class V2 > -struct inner_product_impl< - V1, V2, - typename std::enable_if< - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - typedef typename value_type::type real; - static real get(const V1 &x, const V2 &y) - { - return x.dot(y); - } -}; - -template < class A, class V1, class B, class V2 > -struct axpby_impl< - A, V1, B, V2, - typename std::enable_if< - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - static void apply(A a, const V1 &x, B b, V2 &y) - { - if (!math::is_zero(b)) - y = a * x + b * y; - else - y = a * x; - } -}; - -template < class A, class V1, class B, class V2, class C, class V3 > -struct axpbypcz_impl< - A, V1, B, V2, C, V3, - typename std::enable_if< - is_eigen_type::value && - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - typedef typename value_type::type real; - - static void apply( - real a, const V1 &x, - real b, const V2 &y, - real c, V3 &z - ) - { - if (!math::is_zero(c)) - z = a * x + b * y + c * z; - else - z = a * x + b * y; - } -}; - -template < class Alpha, class V1, class V2, class Beta, class V3 > -struct vmul_impl< - Alpha, V1, V2, Beta, V3, - typename std::enable_if< - is_eigen_type::value && - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - static void apply(Alpha a, const V1 &x, const V2 &y, Beta b, V3 &z) - { - if (!math::is_zero(b)) - z.array() = a * x.array() * y.array() + b * z.array(); - else - z.array() = a * x.array() * y.array(); - } -}; - -template < class V1, class V2 > -struct copy_impl< - V1, V2, - typename std::enable_if< - is_eigen_type::value && - is_eigen_type::value - >::type - > -{ - static void apply(const V1 &x, V2 &y) - { - y = x; - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/hpx.hpp b/src/solvers/amgcl/backend/hpx.hpp deleted file mode 100644 index 8812757..0000000 --- a/src/solvers/amgcl/backend/hpx.hpp +++ /dev/null @@ -1,966 +0,0 @@ -#ifndef AMGCL_BACKEND_HPX_HPP -#define AMGCL_BACKEND_HPX_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/hpx.hpp - * \author Denis Demidov - * \brief HPX backend. - */ - -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -namespace amgcl { -namespace backend { - -/// The matrix is a thin wrapper on top of amgcl::builtin::crs<>. -template -class hpx_matrix { - public: - typedef real value_type; - typedef ptrdiff_t index_type; - - typedef crs Base; - typedef typename Base::row_iterator row_iterator; - - // For each of the output segments y[i] in y = A * x it stores a range of - // segments in x that y[i] depends on. - std::vector> xrange; - - // And yrange stores the inverted dependencies: for each segment in x - // yrange stores range of segments in y that depend on x. This is required - // to determine when a segment of x in [y = A * x] is safe to update. - std::vector> yrange; - - // Creates the matrix from builtin datatype, sets up xrange. - hpx_matrix(std::shared_ptr A, int grain_size) : base(A) - { - index_type n = backend::rows(*A); - index_type m = backend::cols(*A); - - index_type nseg = (n + grain_size - 1) / grain_size; - index_type mseg = (m + grain_size - 1) / grain_size; - - xrange.resize(nseg); - yrange.resize(mseg, std::make_tuple(m, 0)); - - auto range = boost::irange(0, nseg); - - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [this, grain_size, n, A](index_type seg) { - index_type i = seg * grain_size; - index_type beg = A->ptr[i]; - index_type end = A->ptr[std::min(i + grain_size, n)]; - - auto mm = std::minmax_element(A->col + beg, A->col + end); - - index_type xbeg = *std::get<0>(mm) / grain_size; - index_type xend = *std::get<1>(mm) / grain_size + 1; - - xrange[seg] = std::make_tuple(xbeg, xend); - - for(index_type i = xbeg; i < xend; ++i) { - std::get<0>(yrange[i]) = std::min(seg, std::get<0>(yrange[i])); - std::get<1>(yrange[i]) = std::max(seg+1, std::get<1>(yrange[i])); - } - }); - } - - size_t rows() const { return backend::rows(*base); } - size_t cols() const { return backend::cols(*base); } - size_t nonzeros() const { return backend::nonzeros(*base); } - - row_iterator row_begin(size_t row) const { - return base->row_begin(row); - } - private: - // Base matrix is stored in shared_ptr<> to reduce the overhead - // of data transfer from builtin datatypes (used for AMG setup) to the - // backend datatypes. - std::shared_ptr base; - -}; - -/// The vector type to be used with HPX backend. -/** - * hpx_vector is a thin wrapper on top of std::vector. - * The vector consists of continuous segments of fixed size (grain_size) except - * may be the last one that is allowed to be shorter. - * A vector of shared_futures corresponding to each of the segments is stored - * along the data vector to facilitate construction of HPX dependency graph. - */ -template < typename real > -class hpx_vector { - public: - typedef real value_type; - typedef std::vector Base; - typedef typename Base::iterator iterator; - typedef typename Base::const_iterator const_iterator; - - int nseg; // Number of segments in the vector - int grain_size; // Segment size. - - // Futures associated with each segment: - mutable std::vector> safe_to_read; - mutable std::vector> safe_to_write; - - hpx_vector(size_t n, int grain_size) - : nseg( (n + grain_size - 1) / grain_size ), - grain_size( grain_size ), - buf( std::make_shared(n) ) - { - precondition(grain_size > 0, "grain size should be positive"); - init_futures(); - } - - template - hpx_vector(std::shared_ptr o, int grain_size) - : nseg( (o->size() + grain_size - 1) / grain_size ), - grain_size( grain_size ), - buf(std::make_shared(o->data(), o->data() + o->size())) - { - precondition(grain_size > 0, "grain size should be positive"); - init_futures(); - } - - size_t size() const { return buf->size(); } - - const real & operator[](size_t i) const { return (*buf)[i]; } - real & operator[](size_t i) { return (*buf)[i]; } - - const real* data() const { return buf->data(); } - real* data() { return buf->data(); } - - iterator begin() { return buf->begin(); } - iterator end() { return buf->end(); } - - const_iterator begin() const { return buf->cbegin(); } - const_iterator end() const { return buf->cend(); } - - const_iterator cbegin() const { return buf->cbegin(); } - const_iterator cend() const { return buf->cend(); } - - template - boost::iterator_range< - typename std::vector< hpx::shared_future >::iterator - > - safe_range(IdxTuple idx) const { - return boost::make_iterator_range( - safe_to_read.begin() + std::get<0>(idx), - safe_to_read.begin() + std::get<1>(idx) - ); - } - private: - // Segments stored in a continuous array. - // The base vector is stored with shared_ptr for the same reason as with - // hpx_matrix above: to reduce the overhead of data transfer. - std::shared_ptr buf; - - void init_futures() { - safe_to_read.reserve(nseg); - safe_to_write.reserve(nseg); - for(ptrdiff_t i = 0; i < nseg; ++i) { - safe_to_read.push_back(hpx::make_ready_future()); - safe_to_write.push_back(hpx::make_ready_future()); - } - } -}; - -/// HPX backend -/** - * This is a backend that is based on HPX -- a general purpose C++ runtime - * system for parallel and distributed applications of any scale - * http://stellar-group.org/libraries/hpx. - */ -template -struct HPX { - typedef real value_type; - typedef ptrdiff_t index_type; - typedef ptrdiff_t col_type; - typedef ptrdiff_t ptr_type; - - struct provides_row_iterator : std::false_type {}; - - struct params { - /// Number of vector elements in a single segment. - int grain_size; - - params() : grain_size(4096) {} - -#ifndef AMGCL_NO_BOOST - params(const boost::property_tree::ptree &p) - : AMGCL_PARAMS_IMPORT_VALUE(p, grain_size) - { - check_params(p, {"grain_size"}); - } - - void get(boost::property_tree::ptree &p, const std::string &path) const { - AMGCL_PARAMS_EXPORT_VALUE(p, path, grain_size); - } -#endif - }; - - typedef hpx_matrix matrix; - typedef hpx_vector vector; - typedef hpx_vector matrix_diagonal; - - struct direct_solver : public solver::skyline_lu { - typedef solver::skyline_lu Base; - typedef typename Base::params params; - - template - direct_solver(const Matrix &A, const params &prm = params()) - : Base(A, prm) - {} - - struct call_base { - const Base *base; - const real *fptr; - real *xptr; - - template - void operator()(T&&...) const { - (*base)(fptr, xptr); - } - }; - - void operator()(const vector &rhs, vector &x) const { - const real *fptr = &rhs[0]; - real *xptr = &x[0]; - - using hpx::dataflow; - - hpx::shared_future solve = dataflow( - hpx::launch::async, - call_base{this, fptr, xptr}, - rhs.safe_to_read, - x.safe_to_write - ); - - boost::fill(x.safe_to_read, solve); - } - }; - - static std::string name() { return "HPX"; } - - /// Copy matrix. - static std::shared_ptr - copy_matrix(std::shared_ptr A, const params &p) - { - return std::make_shared(A, p.grain_size); - } - - /// Copy vector to builtin backend. - static std::shared_ptr - copy_vector(const typename vector::Base &x, const params &p) - { - return std::make_shared( - std::make_shared(x), p.grain_size - ); - } - - /// Copy vector to builtin backend. - template - static std::shared_ptr> - copy_vector(std::shared_ptr x, const params &p) - { - return std::make_shared>(x, p.grain_size); - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params &p) - { - return std::make_shared(size, p.grain_size); - } - - /// Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr A, const params&) { - return std::make_shared(*A); - } -}; - -//--------------------------------------------------------------------------- -// Backend interface implementation -//--------------------------------------------------------------------------- -template < typename Alpha, typename Beta, typename real > -struct spmv_impl< - Alpha, hpx_matrix, hpx_vector, - Beta, hpx_vector - > -{ - typedef hpx_matrix matrix; - typedef hpx_vector vector; - - struct process_ab { - Alpha alpha; - const hpx_matrix &A; - const real *xptr; - Beta beta; - real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) { - real sum = 0; - for(auto a = A.row_begin(i); a; ++a) - sum += a.value() * xptr[a.col()]; - yptr[i] = alpha * sum + beta * yptr[i]; - } - } - }; - - struct process_a { - Alpha alpha; - const hpx_matrix &A; - const real *xptr; - real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) { - real sum = 0; - for(auto a = A.row_begin(i); a; ++a) - sum += a.value() * xptr[a.col()]; - yptr[i] = alpha * sum; - } - } - }; - - struct wait_for_it { - template - void operator()(T&&) const {} - }; - - static void apply(Alpha alpha, const matrix &A, const vector &x, - Beta beta, vector &y) - { - const real *xptr = &x[0]; - real *yptr = &y[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, y.nseg); - - if (beta) { - // y = alpha * A * x + beta * y - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [alpha, &A, &x, beta, &y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * y.grain_size; - ptrdiff_t end = std::min(beg + y.grain_size, y.size()); - - y.safe_to_read[seg] = dataflow( - hpx::launch::async, - process_ab{alpha, A, xptr, beta, yptr, beg, end}, - y.safe_to_read[seg], - y.safe_to_write[seg], - x.safe_range(A.xrange[seg]) - ); - }); - } else { - // y = alpha * A * x - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [alpha, &A, &x, &y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * y.grain_size; - ptrdiff_t end = std::min(beg + y.grain_size, y.size()); - - y.safe_to_read[seg] = dataflow(hpx::launch::async, - process_a{alpha, A, xptr, yptr, beg, end}, - y.safe_to_write[seg], - x.safe_range(A.xrange[seg]) - ); - }); - } - - // Do not update x until y is ready. - range = boost::irange(0, x.nseg); - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [&A, &x, &y](ptrdiff_t seg) { - x.safe_to_write[seg] = dataflow(hpx::launch::async, - wait_for_it(), - y.safe_range(A.yrange[seg]) - ); - }); - } -}; - -template < typename real > -struct residual_impl< - hpx_matrix, - hpx_vector, - hpx_vector, - hpx_vector - > -{ - typedef hpx_matrix matrix; - typedef hpx_vector vector; - - struct process { - const real *fptr; - const hpx_matrix &A; - const real *xptr; - real *rptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) { - real sum = fptr[i]; - for(auto a = A.row_begin(i); a; ++a) - sum -= a.value() * xptr[a.col()]; - rptr[i] = sum; - } - } - }; - - struct wait_for_it { - template - void operator()(T&&) const {} - }; - - static void apply(const vector &f, const matrix &A, const vector &x, - vector &r) - { - const real *xptr = &x[0]; - const real *fptr = &f[0]; - real *rptr = &r[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, f.nseg); - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [&f, &A, &x, &r, xptr, fptr, rptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * f.grain_size; - ptrdiff_t end = std::min(beg + f.grain_size, f.size()); - - r.safe_to_read[seg] = dataflow(hpx::launch::async, - process{fptr, A, xptr, rptr, beg, end}, - f.safe_to_read[seg], - r.safe_to_write[seg], - x.safe_range(A.xrange[seg]) - ); - }); - - // Do not update x until r is ready. - range = boost::irange(0, x.nseg); - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [&A, &x, &r](ptrdiff_t seg) { - x.safe_to_write[seg] = dataflow(hpx::launch::async, - wait_for_it(), - r.safe_range(A.yrange[seg]) - ); - }); - } -}; - -template < typename real > -struct clear_impl< - hpx_vector - > -{ - typedef hpx_vector vector; - - struct process { - real *xptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - xptr[i] = 0; - } - }; - - static void apply(vector &x) { - real *xptr = &x[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, x.nseg); - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [&x, xptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - x.safe_to_read[seg] = dataflow(hpx::launch::async, - process{xptr, beg, end}, - x.safe_to_write[seg] - ); - }); - } -}; - -template < typename real > -struct copy_impl< - hpx_vector, - hpx_vector - > -{ - typedef hpx_vector vector; - - struct process { - const real *xptr; - real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - yptr[i] = xptr[i]; - } - }; - - static void apply(const vector &x, vector &y) - { - const real *xptr = &x[0]; - real *yptr = &y[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, x.nseg); - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [&x, &y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - y.safe_to_read[seg] = dataflow(hpx::launch::async, - process{xptr, yptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_write[seg] - ); - }); - } -}; - -template < typename real > -struct copy< - std::vector, - hpx_vector - > -{ - typedef hpx_vector vector; - - struct process { - const real *xptr; - real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - yptr[i] = xptr[i]; - } - }; - - static void apply(const std::vector &x, vector &y) - { - const real *xptr = &x[0]; - real *yptr = &y[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, y.nseg); - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [&y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * y.grain_size; - ptrdiff_t end = std::min(beg + y.grain_size, y.size()); - - y.safe_to_read[seg] = dataflow(hpx::launch::async, - process{xptr, yptr, beg, end}, - y.safe_to_write[seg] - ); - }); - } -}; - -template < typename real > -struct inner_product_impl< - hpx_vector, - hpx_vector - > -{ - typedef hpx_vector vector; - - struct process { - const real *xptr; - const real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - double operator()(T&&...) const { - real sum = 0; - - for(ptrdiff_t i = beg; i < end; ++i) - sum += xptr[i] * yptr[i]; - - return sum; - } - }; - - static real get(const vector &x, const vector &y) - { - const real *xptr = &x[0]; - const real *yptr = &y[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, x.nseg); - return hpx::parallel::transform_reduce( - hpx::parallel::par, - boost::begin(range), boost::end(range), - math::zero(), std::plus(), - [&x, &y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - return dataflow(hpx::launch::async, - process{xptr, yptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_read[seg] - ).get(); - } - ); - } -}; - -template < typename A, typename B, typename real > -struct axpby_impl< - A, hpx_vector, - B, hpx_vector - > -{ - typedef hpx_vector vector; - - struct process_ab { - typedef void result_type; - - A a; - const real *xptr; - B b; - real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - yptr[i] = a * xptr[i] + b * yptr[i]; - } - }; - - struct process_a { - typedef void result_type; - - A a; - const real *xptr; - real *yptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - yptr[i] = a * xptr[i]; - } - }; - - static void apply(A a, const vector &x, B b, vector &y) - { - const real *xptr = &x[0]; - real *yptr = &y[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, x.nseg); - if (b) { - // y = a * x + b * y; - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [a, &x, b, &y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - y.safe_to_read[seg] = dataflow(hpx::launch::async, - process_ab{a, xptr, b, yptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_read[seg], - y.safe_to_write[seg] - ); - }); - } else { - // y = a * x; - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [a, &x, &y, xptr, yptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - y.safe_to_read[seg] = dataflow(hpx::launch::async, - process_a{a, xptr, yptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_write[seg] - ); - }); - } - } -}; - -template < typename A, typename B, typename C, typename real > -struct axpbypcz_impl< - A, hpx_vector, - B, hpx_vector, - C, hpx_vector - > -{ - typedef hpx_vector vector; - - struct process_abc { - A a; - const real *xptr; - B b; - const real *yptr; - C c; - real *zptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - zptr[i] = a * xptr[i] + b * yptr[i] + c * zptr[i]; - } - }; - - struct process_ab { - A a; - const real *xptr; - B b; - const real *yptr; - real *zptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - zptr[i] = a * xptr[i] + b * yptr[i]; - } - }; - - static void apply( - A a, const vector &x, - B b, const vector &y, - C c, vector &z - ) - { - const real *xptr = &x[0]; - const real *yptr = &y[0]; - real *zptr = &z[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, x.nseg); - if (c) { - //z = a * x + b * y + c * z; - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [a, &x, b, &y, c, &z, xptr, yptr, zptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - z.safe_to_read[seg] = dataflow(hpx::launch::async, - process_abc{a, xptr, b, yptr, c, zptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_read[seg], - z.safe_to_read[seg], - z.safe_to_write[seg] - ); - }); - } else { - //z = a * x + b * y; - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [a, &x, b, &y, &z, xptr, yptr, zptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - z.safe_to_read[seg] = dataflow(hpx::launch::async, - process_ab{a, xptr, b, yptr, zptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_read[seg], - z.safe_to_write[seg] - ); - }); - } - } -}; - -template < typename A, typename B, typename real > -struct vmul_impl< - A, hpx_vector, hpx_vector, - B, hpx_vector - > -{ - typedef hpx_vector vector; - - struct process_ab { - A a; - const real *xptr; - const real *yptr; - B b; - real *zptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - zptr[i] = a * xptr[i] * yptr[i] + b * zptr[i]; - } - }; - - struct process_a { - A a; - const real *xptr; - const real *yptr; - real *zptr; - - ptrdiff_t beg; - ptrdiff_t end; - - template - void operator()(T&&...) const { - for(ptrdiff_t i = beg; i < end; ++i) - zptr[i] = a * xptr[i] * yptr[i]; - } - }; - - static void apply(A a, const vector &x, const vector &y, B b, vector &z) - { - const real *xptr = &x[0]; - const real *yptr = &y[0]; - real *zptr = &z[0]; - - using hpx::dataflow; - - auto range = boost::irange(0, x.nseg); - if (b) { - //z = a * x * y + b * z; - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [a, &x, &y, b, &z, xptr, yptr, zptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - z.safe_to_read[seg] = dataflow(hpx::launch::async, - process_ab{a, xptr, yptr, b, zptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_read[seg], - z.safe_to_read[seg], - z.safe_to_write[seg] - ); - }); - } else { - //z = a * x * y; - hpx::parallel::for_each( - hpx::parallel::par, - boost::begin(range), boost::end(range), - [a, &x, &y, &z, xptr, yptr, zptr](ptrdiff_t seg) { - ptrdiff_t beg = seg * x.grain_size; - ptrdiff_t end = std::min(beg + x.grain_size, x.size()); - - z.safe_to_read[seg] = dataflow(hpx::launch::async, - process_a{a, xptr, yptr, zptr, beg, end}, - x.safe_to_read[seg], - y.safe_to_read[seg], - z.safe_to_write[seg] - ); - }); - } - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/interface.hpp b/src/solvers/amgcl/backend/interface.hpp deleted file mode 100644 index c3074c4..0000000 --- a/src/solvers/amgcl/backend/interface.hpp +++ /dev/null @@ -1,449 +0,0 @@ -#ifndef AMGCL_BACKEND_INTERFACE_HPP -#define AMGCL_BACKEND_INTERFACE_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/interface.hpp - * \author Denis Demidov - * \brief Backend interface required for AMG. - */ - -#include - -#include - -#include -#include - -namespace amgcl { - -/// Provided backends. -namespace backend { - -/** - * \defgroup backends Provided backends - * \brief Backends implemented in AMGCL. - * - * A backend in AMGCL is a class that defines matrix and vector types together - * with several operations on them, such as creation, matrix-vector products, - * vector sums, inner products etc. The AMG hierarchy is moved to the - * specified backend upon construction. The solution phase then uses types and - * operations defined in the backend. This enables transparent acceleration of - * the solution phase with OpenMP, OpenCL, CUDA, or any other technologies. - */ - -/** - * \defgroup backend_interface Backend interface - * \brief Backend interface specification. - * - * One has to specify these templates in order to define a new backend. - */ - -/** \addtogroup backend_interface - * @{ - */ - -/// Metafunction that checks if two backends are compatible. -/** - * That is, a solver in SBackend may be used together with a preconditioner in PBackend. - */ -template -struct backends_compatible : std::is_same {}; - -/// Metafunction that returns value type of a matrix or a vector type. -template -struct value_type { - typedef typename T::value_type type; -}; - -/// Metafunction that returns column type of a matrix. -template -struct col_type { - typedef typename T::col_type type; -}; - -/// Metafunction that returns pointer type of a matrix. -template -struct ptr_type { - typedef typename T::ptr_type type; -}; - -/// Implementation for function returning the number of rows in a matrix. -/** \note Used in rows() */ -template -struct rows_impl { - static size_t get(const Matrix &A) { - return A.rows(); - } -}; - -/// Implementation for function returning the number of columns in a matrix. -/** \note Used in cols() */ -template -struct cols_impl { - static size_t get(const Matrix &A) { - return A.cols(); - } -}; - -/// Implementation for function returning number of bytes allocated for a matrix/vector. -/** \note Used in bytes() */ -template -struct bytes_impl { - - // Use bytes() method when available. - template - static auto get_impl(const U &t, int) -> decltype(t.bytes()) { - return t.bytes(); - } - - // Fallback to zero. - template - static size_t get_impl(const U&, ...) { - return 0; - } - - static size_t get(const T &t) { - return get_impl(t, 0); - } -}; - -template -struct ptr_data_impl { - typedef typename Matrix::PTR_DATA_NOT_IMPLEMENTED type; -}; - -template -struct col_data_impl { - typedef typename Matrix::COL_DATA_NOT_IMPLEMENTED type; -}; - -template -struct val_data_impl { - typedef typename Matrix::VAL_DATA_NOT_IMPLEMENTED type; -}; - -/// Implementation for function returning the number of nonzeros in a matrix. -/** \note Used in nonzeros() */ -template -struct nonzeros_impl { - static size_t get(const Matrix &A) { - return A.nonzeros(); - } -}; - -/// Implementation for function returning the number of nonzeros in a matrix row. -/** \note Used in row_nonzeros() */ -template -struct row_nonzeros_impl { - typedef typename Matrix::ROW_NONZEROS_NOT_IMPLEMENTED type; -}; - -/// Metafunction returning the row iterator type for a matrix type. -/** - * \note This only has to be implemented in the backend if support for serial - * smoothers (Gauss-Seidel or ILU0) is required. - */ -template -struct row_iterator { - typedef typename Matrix::row_iterator type; -}; - -/// Implementation for function returning row iterator for a matrix. -/** - * \note This only has to be implemented in the backend if support for serial - * smoothers (Gauss-Seidel or ILU0) is required. - * \note Used in row_begin() - */ -template -struct row_begin_impl { - static typename row_iterator::type - get(const Matrix &A, size_t row) { - return A.row_begin(row); - } -}; - -/// Implementation for matrix-vector product. -/** \note Used in spmv() */ -template -struct spmv_impl { - typedef typename Matrix::SPMV_NOT_IMPLEMENTED type; -}; - -/// Implementation for residual error compuatation. -/** \note Used in residual() */ -template -struct residual_impl { - typedef typename Matrix::RESIDUAL_NOT_IMPLEMENTED type; -}; - -/// Implementation for zeroing out a vector. -/** \note Used in clear() */ -template -struct clear_impl { - typedef typename Vector::CLEAR_NOT_IMPLEMENTED type; -}; - -/// Implementation for vector copy. -/** \note Used in copy() */ -template -struct copy_impl { - typedef typename Vector1::COPY_NOT_IMPLEMENTED type; -}; - -/// Implementation for inner product. -/** \note Used in inner_product() */ -template -struct inner_product_impl { - typedef typename Vector1::INNER_PRODUCT_NOT_IMPLEMENTED type; -}; - -/// Implementation for linear combination of two vectors. -/** \note Used in axpby() */ -template -struct axpby_impl { - typedef typename Vector1::AXPBY_NOT_IMPLEMENTED type; -}; - -/// Implementation for linear combination of three vectors. -/** \note Used in axpbypcz() */ -template -struct axpbypcz_impl { - typedef typename Vector1::AXPBYPCZ_NOT_IMPLEMENTED type; -}; - -/// Implementation for element-wize vector product. -/** \note Used in vmul() */ -template -struct vmul_impl { - typedef typename Vector1::VMUL_NOT_IMPLEMENTED type; -}; - -/// Reinterpret the vector to be compatible with the matrix value type -template -struct reinterpret_as_rhs_impl { - typedef typename MatrixValue::REINTERPRET_AS_RHS_NOT_IMPLEMENTED type; -}; - -/** @} */ - -/// Returns the number of rows in a matrix. -template -size_t rows(const Matrix &matrix) { - return rows_impl::get(matrix); -} - -/// Returns the number of columns in a matrix. -template -size_t cols(const Matrix &matrix) { - return cols_impl::get(matrix); -} - -/// Returns number of bytes allocated for the container (matrix / vector) -template -size_t bytes(const T &t) { - return bytes_impl::get(t); -} - -template -typename ptr_data_impl::type -ptr_data(const Matrix &matrix) { - return ptr_data_impl::get(matrix); -} - -template -typename col_data_impl::type -col_data(const Matrix &matrix) { - return col_data_impl::get(matrix); -} - -template -typename val_data_impl::type -val_data(const Matrix &matrix) { - return val_data_impl::get(matrix); -} - -/// Returns the number of nonzeros in a matrix. -template -size_t nonzeros(const Matrix &matrix) { - return nonzeros_impl::get(matrix); -} - -/// Returns row iterator for a matrix. -template -typename row_iterator::type -row_begin(const Matrix &matrix, size_t row) { - return row_begin_impl::get(matrix, row); -} - -/// Returns number of nonzeros in a matrix row. -template -size_t row_nonzeros(const Matrix &A, size_t row) { - return row_nonzeros_impl::get(A, row); -} - -/// Performs matrix-vector product. -/** - * \f[y = \alpha A x + \beta y.\f] - */ -template -void spmv( - Alpha alpha, - const Matrix &A, - const Vector1 &x, - Beta beta, - Vector2 &y) -{ - AMGCL_TIC("spmv"); - spmv_impl::apply(alpha, A, x, beta, y); - AMGCL_TOC("spmv"); -} - -/// Computes residual error. -/** - * \f[r = rhs - Ax.\f] - */ -template -void residual(const Vector1 &rhs, const Matrix &A, const Vector2 &x, Vector3 &r) -{ - AMGCL_TIC("residual"); - residual_impl::apply(rhs, A, x, r); - AMGCL_TOC("residual"); -} - -/// Zeros out a vector. -template -void clear(Vector &x) -{ - AMGCL_TIC("clear"); - clear_impl::apply(x); - AMGCL_TOC("clear"); -} - -/// Vector copy. -template -void copy(const Vector1 &x, Vector2 &y) -{ - AMGCL_TIC("copy"); - copy_impl::apply(x, y); - AMGCL_TOC("copy"); -} - -/// Computes inner product of two vectors. -template -typename math::inner_product_impl< - typename value_type::type - >::return_type -inner_product(const Vector1 &x, const Vector2 &y) -{ - typedef typename math::inner_product_impl< - typename value_type::type - >::return_type result_type; - - AMGCL_TIC("inner_product"); - result_type p = inner_product_impl::get(x, y); - AMGCL_TOC("inner_product"); - - return p; -} - -/// Computes linear combination of two vectors. -/** - * \f[y = ax + by.\f] - */ -template -void axpby(A a, Vector1 const &x, B b, Vector2 &y) { - AMGCL_TIC("axpby"); - axpby_impl::apply(a, x, b, y); - AMGCL_TOC("axpby"); -} - -/// Computes linear combination of three vectors. -/** - * \f[z = ax + by + cz.\f] - */ -template -void axpbypcz(A a, Vector1 const &x, B b, Vector2 const &y, C c, Vector3 &z) { - AMGCL_TIC("axpbypcz"); - axpbypcz_impl::apply(a, x, b, y, c, z); - AMGCL_TOC("axpbypcz"); -} - -/// Computes element-wize vector product. -/** - * \f[z = \alpha xy + \beta z.\f] - */ -template -void vmul(Alpha alpha, const Vector1 &x, const Vector2 &y, Beta beta, Vector3 &z) -{ - AMGCL_TIC("vmul"); - vmul_impl::apply(alpha, x, y, beta, z); - AMGCL_TOC("vmul"); -} - -/// Reinterpret the vector to be compatible with the matrix value type -template -typename reinterpret_as_rhs_impl< - MatrixValue, - typename std::decay::type, - std::is_const::type>::value - >::return_type -reinterpret_as_rhs(Vector &&x) { - return reinterpret_as_rhs_impl< - MatrixValue, - typename std::decay::type, - std::is_const::type>::value - >::get(std::forward(x)); -} - -/// Is the relaxation supported by the backend? -template class Relaxation, class Enable = void> -struct relaxation_is_supported : std::true_type {}; - -/// Is the coarsening supported by the backend? -template class Coarsening, class Enable = void> -struct coarsening_is_supported : std::true_type {}; - -/// Linear combination of vectors -/** - * \f[ y = \sum_j c_j v_j + alpha * y \f] - */ -template -void lin_comb(size_t n, const Coefs &c, const Vecs &v, const Coef &alpha, Vec &y) { - axpby(c[0], *v[0], alpha, y); - size_t i = 1; - for(; i + 1 < n; i += 2) - axpbypcz(c[i], *v[i], c[i+1], *v[i+1], math::identity(), y); - - for(; i < n; ++i) - axpby(c[i], *v[i], math::identity(), y); -} - -} // namespace backend -} // namespace amgcl - - -#endif diff --git a/src/solvers/amgcl/backend/mkl.hpp b/src/solvers/amgcl/backend/mkl.hpp deleted file mode 100644 index c26d4fb..0000000 --- a/src/solvers/amgcl/backend/mkl.hpp +++ /dev/null @@ -1,294 +0,0 @@ -#ifndef AMGCL_BACKEND_MKL_HPP -#define AMGCL_BACKEND_MKL_HPP - -/* -The MIT License - -Copyright (c) 2012-2014 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/mkl.hpp - * \author Denis Demidov - * \brief Intel Math Kernel library backend. - */ - -#include -#include - -#include - -namespace amgcl { -namespace backend { - -/// Sparse matrix stored in CRS format. -struct mkl_crs : public crs { - typedef crs Base; - - template < - class PtrRange, - class ColRange, - class ValRange - > - mkl_crs(size_t nrows, size_t ncols, - const PtrRange &ptr_range, - const ColRange &col_range, - const ValRange &val_range - ) - : Base(nrows, ncols, ptr_range, col_range, val_range) - {} - - template - mkl_crs(const Matrix &A) : Base(A) {} -}; - -/// BLAS vector. -class mkl_vec { - public: - mkl_vec() {} - mkl_vec(size_t n) : buf(n) {} - - template - mkl_vec(Iterator beg, Iterator end) : buf(beg, end) {} - - size_t size() const { - return buf.size(); - } - - double operator[](size_t i) const { - return buf[i]; - } - - double& operator[](size_t i) { - return buf[i]; - } - - const double* data() const { - return buf.data(); - } - - double* data() { - return buf.data(); - } - private: - std::vector buf; -}; - -/// Intel Math Kernel library backend. -struct mkl { - typedef double value_type; - typedef int index_type; - typedef int col_type; - typedef int ptr_type; - - typedef mkl_crs matrix; - typedef mkl_vec vector; - typedef vector matrix_diagonal; - typedef solver::skyline_lu direct_solver; - - /// Backend parameters. - struct params { - params() {} - params(const boost::property_tree::ptree&) {} - }; - - /// Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix( - std::shared_ptr< typename builtin::matrix > A, - const params& - ) - { - return std::make_shared(*A); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(typename builtin::vector const &x, const params&) - { - return std::make_shared(x.data(), x.data() + x.size()); - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector( - std::shared_ptr< typename builtin::vector > x, - const params &prm - ) - { - return copy_vector(*x, prm); - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - /// Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr< typename builtin::matrix > A, const params&) - { - return std::make_shared(*A); - } -}; - -//--------------------------------------------------------------------------- -// Backend interface implementation -//--------------------------------------------------------------------------- -template <> -struct value_type < mkl_crs > { - typedef double type; -}; - -template <> -struct value_type < mkl_vec > { - typedef double type; -}; - -template <> -struct rows_impl< mkl_crs > { - static size_t get(const mkl_crs &A) { - return A.nrows; - } -}; - -template <> -struct cols_impl< mkl_crs > { - static size_t get(const mkl_crs &A) { - return A.ncols; - } -}; - -template <> -struct nonzeros_impl< mkl_crs > { - static size_t get(const mkl_crs &A) { - return A.ptr[A.nrows]; - } -}; - -template -struct spmv_impl< - Alpha, mkl_crs, mkl_vec, - Beta, mkl_vec - > -{ - static void apply(Alpha alpha, const mkl_crs &A, const mkl_vec &x, - Beta beta, mkl_vec &y) - { - MKL_INT m = A.nrows; - MKL_INT k = A.ncols; - mkl_dcsrmv("N", &m, &k, &alpha, "G__C", - const_cast(&A.val[0]), - const_cast(&A.col[0]), - const_cast(&A.ptr[0]), - const_cast(&A.ptr[1]), - const_cast(x.data()), - &beta, - y.data()); - } -}; - -template <> -struct residual_impl< mkl_crs, mkl_vec, mkl_vec, mkl_vec > -{ - static void apply(const mkl_vec &rhs, const mkl_crs &A, const mkl_vec &x, - mkl_vec &r) - { - cblas_dcopy(rhs.size(), rhs.data(), 1, r.data(), 1); - spmv_impl::apply(-1, A, x, 1, r); - } -}; - -template <> -struct clear_impl< mkl_vec > -{ - static void apply(mkl_vec &x) - { - std::fill_n(x.data(), x.size(), 0.0); - } -}; - -template <> -struct copy_impl< mkl_vec, mkl_vec > -{ - static void apply(const mkl_vec &x, mkl_vec &y) - { - cblas_dcopy(x.size(), x.data(), 1, y.data(), 1); - } -}; - -template <> -struct inner_product_impl< mkl_vec, mkl_vec > -{ - static double get(const mkl_vec &x, const mkl_vec &y) - { - return cblas_ddot(x.size(), x.data(), 1, y.data(), 1); - } -}; - -template -struct axpby_impl< - A, mkl_vec, - B, mkl_vec - > -{ - static void apply(A a, const mkl_vec &x, B b, mkl_vec &y) - { - cblas_dscal(y.size(), b, y.data(), 1); - cblas_daxpy(y.size(), a, x.data(), 1, y.data(), 1); - } -}; - -template -struct axpbypcz_impl< - A, mkl_vec, - B, mkl_vec, - C, mkl_vec - > -{ - static void apply( - A a, const mkl_vec &x, - B b, const mkl_vec &y, - C c, mkl_vec &z - ) - { - cblas_dscal(z.size(), c, z.data(), 1); - cblas_daxpy(z.size(), a, x.data(), 1, z.data(), 1); - cblas_daxpy(z.size(), b, y.data(), 1, z.data(), 1); - } -}; - -template -struct vmul_impl< - A, mkl_vec, mkl_vec, - B, mkl_vec > -{ - static void apply(A a, const mkl_vec &x, const mkl_vec &y, B b, mkl_vec &z) - { - cblas_dsbmv(CblasRowMajor, CblasLower, z.size(), 0, a, x.data(), 1, y.data(), 1, b, z.data(), 1); - } -}; -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/vexcl.hpp b/src/solvers/amgcl/backend/vexcl.hpp deleted file mode 100644 index 4a1dcc2..0000000 --- a/src/solvers/amgcl/backend/vexcl.hpp +++ /dev/null @@ -1,493 +0,0 @@ -#ifndef AMGCL_BACKEND_VEXCL_HPP -#define AMGCL_BACKEND_VEXCL_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/vexcl.hpp - * \author Denis Demidov - * \brief VexCL backend. - */ - -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -namespace amgcl { - -namespace solver { - -/** Wrapper around solver::skyline_lu for use with the VexCL backend. - * Copies the rhs to the host memory, solves the problem using the host CPU, - * then copies the solution back to the compute device(s). - */ -template -struct vexcl_skyline_lu : solver::skyline_lu { - typedef solver::skyline_lu Base; - typedef typename math::rhs_of::type rhs_type; - - mutable std::vector _rhs, _x; - - template - vexcl_skyline_lu(const Matrix &A, const Params&) - : Base(*A), _rhs(backend::rows(*A)), _x(backend::rows(*A)) - { } - - template - void operator()(const Vec1 &rhs, Vec2 &x) const { - vex::copy(rhs, _rhs); - static_cast(this)->operator()(_rhs, _x); - vex::copy(_x, x); - } - - size_t bytes() const { - return - backend::bytes(*static_cast(this)) + - backend::bytes(_rhs) + - backend::bytes(_x); - } -}; - -} - -namespace backend { - -/// The VexCL backend parameters. -struct vexcl_params { - - std::vector< vex::backend::command_queue > q; ///< Command queues that identify compute devices to use with VexCL. - - /// Do CSR to ELL conversion on the GPU side. - /** This will result in faster setup, but will require more GPU memory. */ - bool fast_matrix_setup; - - vexcl_params() : fast_matrix_setup(true) {} - -#ifndef AMGCL_NO_BOOST - vexcl_params(const boost::property_tree::ptree &p) - : fast_matrix_setup(p.get("fast_matrix_setup", vexcl_params().fast_matrix_setup)) - { - std::vector *ptr = 0; - ptr = p.get("q", ptr); - if (ptr) q = *ptr; - check_params(p, {"q", "fast_matrix_setup"}); - } - - void get(boost::property_tree::ptree &p, const std::string &path) const { - p.put(path + "q", &q); - p.put(path + "fast_matrix_setup", fast_matrix_setup); - } -#endif - - const std::vector& context() const { - if (q.empty()) - return vex::current_context().queue(); - else - return q; - } -}; - - -/** - * The backend uses the VexCL - * library for accelerating solution on the modern GPUs and multicore - * processors with the help of OpenCL or CUDA technologies. - * The VexCL backend stores the system matrix as ``vex::SpMat`` and - * expects the right hand side and the solution vectors to be instances of the - * ``vex::vector`` type. - */ -template > -struct vexcl { - typedef real value_type; - typedef ptrdiff_t index_type; - typedef ColumnType col_type; - typedef PointerType ptr_type; - - typedef vex::sparse::distributed< - vex::sparse::matrix - > matrix; - typedef typename math::rhs_of::type rhs_type; - typedef vex::vector vector; - typedef vex::vector matrix_diagonal; - typedef DirectSolver direct_solver; - - struct provides_row_iterator : std::false_type {}; - - typedef vexcl_params params; - - static std::string name() { return "vexcl"; } - - // Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix(std::shared_ptr< typename builtin::matrix > A, const params &prm) - { - precondition(!prm.context().empty(), "Empty VexCL context!"); - - const typename builtin::matrix &a = *A; - - const size_t n = rows(*A); - const size_t m = cols(*A); - const size_t nnz = a.ptr[n]; - - return std::make_shared(prm.context(), n, m, - boost::make_iterator_range(a.ptr, a.ptr + n+1), - boost::make_iterator_range(a.col, a.col + nnz), - boost::make_iterator_range(a.val, a.val + nnz), - prm.fast_matrix_setup - ); - } - - // Copy vector from builtin backend. - template - static std::shared_ptr< vex::vector > - copy_vector(const std::vector &x, const params &prm) - { - precondition(!prm.context().empty(), "Empty VexCL context!"); - return std::make_shared< vex::vector >(prm.context(), x); - } - - template - static std::shared_ptr< vex::vector > - copy_vector(const numa_vector &x, const params &prm) - { - precondition(!prm.context().empty(), "Empty VexCL context!"); - return std::make_shared< vex::vector >(prm.context(), x.size(), x.data()); - } - - // Copy vector from builtin backend. - template - static std::shared_ptr< vex::vector > - copy_vector(std::shared_ptr< numa_vector > x, const params &prm) - { - return copy_vector(*x, prm); - } - - // Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params &prm) - { - precondition(!prm.context().empty(), "Empty VexCL context!"); - - return std::make_shared(prm.context(), size); - } - - struct gather { - size_t n; - mutable vex::gather G; - mutable std::vector buf; - - gather(size_t src_size, const std::vector &I, const params &prm) - : n(I.size()), G(prm.context(), src_size, std::vector(I.begin(), I.end())) - { } - - template - void operator()(const vex::vector &src, vex::vector &dst) const { - if (buf.size() < sizeof(D) * n) buf.resize(sizeof(D) * n); - auto t = reinterpret_cast(buf.data()); - G(src, t); - vex::copy(t, t + n, dst.begin()); - } - - template - void operator()(const vex::vector &vec, std::vector &vals) const { - G(vec, vals); - } - }; - - struct scatter { - size_t n; - mutable vex::scatter S; - mutable std::vector buf; - - scatter(size_t size, const std::vector &I, const params &prm) - : n(I.size()), S(prm.context(), size, std::vector(I.begin(), I.end())) - { } - - template - void operator()(const vex::vector &src, vex::vector &dst) const { - if (buf.size() < sizeof(D) * n) buf.resize(sizeof(D) * n); - auto t = reinterpret_cast(buf.data()); - vex::copy(src.begin(), src.end(), t); - S(t, dst); - } - }; - - - // Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr< typename builtin::matrix > A, const params &prm) - { - return std::make_shared(A, prm); - } -}; - -// Hybrid backend uses scalar matrices to build the hierarchy, -// but stores the computed matrices in the block format. -template < - typename BlockType, - typename ColumnType = ptrdiff_t, - typename PointerType = ColumnType, - class DirectSolver = solver::vexcl_skyline_lu::type> - > -struct vexcl_hybrid : public vexcl::type, ColumnType, PointerType, DirectSolver> -{ - typedef typename math::scalar_of::type ScalarType; - typedef vexcl Base; - typedef vex::sparse::distributed< - vex::sparse::matrix< - BlockType, - typename Base::col_type, - typename Base::ptr_type - > - > matrix; - - static std::shared_ptr - copy_matrix(std::shared_ptr< typename builtin::matrix > As, const typename Base::params &prm) - { - precondition(!prm.context().empty(), "Empty VexCL context!"); - - typename builtin::matrix A(amgcl::adapter::block_matrix(*As)); - - const size_t n = rows(A); - const size_t m = cols(A); - const size_t nnz = A.ptr[n]; - - return std::make_shared(prm.context(), n, m, - boost::make_iterator_range(A.ptr, A.ptr + n+1), - boost::make_iterator_range(A.col, A.col + nnz), - boost::make_iterator_range(A.val, A.val + nnz), - prm.fast_matrix_setup - ); - } -}; - -//--------------------------------------------------------------------------- -// Backend interface implementation -//--------------------------------------------------------------------------- -template -struct backends_compatible< vexcl, vexcl > : std::true_type {}; - -template -struct backends_compatible< vexcl_hybrid, vexcl_hybrid > : std::true_type {}; - -template -struct backends_compatible< vexcl, vexcl_hybrid > : std::true_type {}; - -template -struct backends_compatible< vexcl_hybrid, vexcl > : std::true_type {}; - -template < typename V, typename C, typename P > -struct bytes_impl< vex::sparse::distributed > > { - static size_t get(const vex::sparse::distributed > &A) { - return - sizeof(P) * (A.rows() + 1) + - sizeof(C) * A.nonzeros() + - sizeof(V) * A.nonzeros(); - } -}; - -template < typename V > -struct bytes_impl< vex::vector > { - static size_t get(const vex::vector &v) { - return v.size() * sizeof(V); - } -}; - -template < typename Alpha, typename Beta, typename Va, typename Vx, typename Vy, typename C, typename P > -struct spmv_impl< - Alpha, vex::sparse::distributed>, vex::vector, - Beta, vex::vector, - typename std::enable_if< - math::static_rows::value == 1 && - math::static_rows::value == 1 && - math::static_rows::value == 1 - >::type - > -{ - typedef vex::sparse::distributed> matrix; - - static void apply(Alpha alpha, const matrix &A, const vex::vector &x, - Beta beta, vex::vector &y) - { - if (beta) - y = alpha * (A * x) + beta * y; - else - y = alpha * (A * x); - } -}; - -template < typename Va, typename Vf, typename Vx, typename Vr, typename C, typename P > -struct residual_impl< - vex::sparse::distributed>, - vex::vector, - vex::vector, - vex::vector, - typename std::enable_if< - !is_static_matrix::value && - !is_static_matrix::value && - !is_static_matrix::value && - !is_static_matrix::value - >::type - > -{ - typedef vex::sparse::distributed> matrix; - - static void apply(const vex::vector &rhs, const matrix &A, const vex::vector &x, - vex::vector &r) - { - r = rhs - A * x; - } -}; - -template < typename V > -struct clear_impl< vex::vector > -{ - static void apply(vex::vector &x) - { - x = 0; - } -}; - -template < class V, class T > -struct copy_impl > -{ - static void apply(const V &x, vex::vector &y) - { - vex::copy(x, y); - } -}; - -template < class T, class V > -struct copy_impl, V> -{ - static void apply(const vex::vector &x, V &y) - { - vex::copy(x, y); - } -}; - -template < class T1, class T2 > -struct copy_impl, vex::vector> -{ - static void apply(const vex::vector &x, vex::vector &y) - { - vex::copy(x, y); - } -}; - -template < typename V > -struct inner_product_impl< - vex::vector, - vex::vector - > -{ - static V get(const vex::vector &x, const vex::vector &y) - { - vex::Reductor sum( x.queue_list() ); - return sum(x * y); - } -}; - -template < typename A, typename B, typename V1, typename V2 > -struct axpby_impl< - A, vex::vector, - B, vex::vector - > { - static void apply(A a, const vex::vector &x, B b, vex::vector &y) - { - if (b) - y = a * x + b * y; - else - y = a * x; - } -}; - -template < typename A, typename B, typename C, typename V1, typename V2, typename V3 > -struct axpbypcz_impl< - A, vex::vector, - B, vex::vector, - C, vex::vector - > -{ - static void apply( - A a, const vex::vector &x, - B b, const vex::vector &y, - C c, vex::vector &z - ) - { - if (c) - z = a * x + b * y + c * z; - else - z = a * x + b * y; - } -}; - -template < typename A, typename B, typename Vx, typename Vy, typename Vz > -struct vmul_impl< - A, vex::vector, vex::vector, - B, vex::vector - > -{ - static void apply(A a, const vex::vector &x, const vex::vector &y, - B b, vex::vector &z) - { - if (b) - z = a * x * y + b * z; - else - z = a * x * y; - } -}; - -template -struct reinterpret_as_rhs_impl, IsConst> -{ - typedef typename math::scalar_of::type scalar_type; - typedef typename math::rhs_of::type rhs_type; - typedef typename math::replace_scalar::type dst_type; - typedef vex::vector return_type; - - static return_type get(const vex::vector &x) { - return x.template reinterpret(); - } -}; - - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/vexcl_static_matrix.hpp b/src/solvers/amgcl/backend/vexcl_static_matrix.hpp deleted file mode 100644 index ad2fde6..0000000 --- a/src/solvers/amgcl/backend/vexcl_static_matrix.hpp +++ /dev/null @@ -1,1031 +0,0 @@ -#ifndef AMGCL_BACKEND_VEXCL_STATIC_MATRIX_HPP -#define AMGCL_BACKEND_VEXCL_STATIC_MATRIX_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/vexcl_static_matrix.hpp - * \author Denis Demidov - * \brief Static matrix support for the VexCL backend. - */ - -#include -#include -#include -#include - -namespace vex { - -template -struct is_cl_native< amgcl::static_matrix > : std::true_type {}; - -template -struct type_name_impl< amgcl::static_matrix > -{ - static std::string get() { - std::ostringstream s; - s << "amgcl_matrix_" << type_name() << "_" << N << "x" << M; - return s.str(); - } -}; - -template -struct cl_scalar_of< amgcl::static_matrix > { - typedef T type; -}; - -namespace sparse { - -template -struct rhs_of< amgcl::static_matrix > { - typedef amgcl::static_matrix type; -}; - -template -struct spmv_ops_impl, amgcl::static_matrix> { - typedef amgcl::static_matrix matrix_value; - typedef amgcl::static_matrix vector_value; - - static void decl_accum_var(backend::source_generator &src, const std::string &name) - { - src.new_line() << type_name>() << " " << name << ";"; - for(int i = 0; i < N; ++i) { - src.new_line() << name << ".data[" << i << "][0] = 0;"; - } - } - - static void append(backend::source_generator &src, - const std::string &sum, const std::string &val) - { - for(int i = 0; i < N; ++i) - src.new_line() << sum << ".data[" << i << "][0] += " << val << ".data[" << i << "][0];"; - } - - static void append_product(backend::source_generator &src, - const std::string &sum, const std::string &mat_val, const std::string &vec_val) - { - src.open("{"); - src.new_line() << type_name() << " v = " << vec_val << ";"; - for(int i = 0; i < N; ++i) { - src.new_line() << sum << ".data[" << i << "][0] += "; - for(int j = 0; j < N; ++j) { - if (j) src << " + "; - src << mat_val << ".data[" << i << "][" << j << "] * v.data[" << j << "][0]"; - } - src << ";"; - } - src.close("}"); - } -}; - -template -class ell, Col, Ptr> { - public: - typedef amgcl::static_matrix Val; - typedef Val value_type; - - typedef Val val_type; - typedef Col col_type; - typedef Ptr ptr_type; - - template - ell( - const std::vector &q, - size_t nrows, size_t ncols, - const PtrRange &ptr, - const ColRange &col, - const ValRange &val, - bool fast = true - ) : q(q[0]), n(nrows), m(ncols), - nnz(std::distance(std::begin(val), std::end(val))), - ell_pitch(alignup(nrows, 16U)), csr_nnz(0) - { - precondition(q.size() == 1, - "sparse::ell is only supported for single-device contexts"); - - if (fast) { - convert(ptr, col, val); - return; - } - - /* 1. Get optimal ELL widths for local and remote parts. */ - // Speed of ELL relative to CSR: - const double ell_vs_csr = 3.0; - - // Find maximum widths for local and remote parts: - size_t max_width = 0; - for(size_t i = 0; i < n; ++i) - max_width = std::max(max_width, ptr[i+1] - ptr[i]); - - // Build width distribution histogram. - std::vector hist(max_width + 1, 0); - for(size_t i = 0; i < n; ++i) - ++hist[ptr[i+1] - ptr[i]]; - - // Estimate optimal width for ELL part of the matrix. - ell_width = max_width; - for(size_t i = 0, rows = n; i < max_width; ++i) { - rows -= hist[i]; // Number of rows wider than i. - if (ell_vs_csr * rows < n) { - ell_width = i; - break; - } - } - - if (ell_width == 0) { - assert(csr_nnz == nnz); - - csr_ptr = backend::device_vector(q[0], n + 1, &ptr[0]); - csr_col = backend::device_vector(q[0], csr_nnz, &col[0]); - csr_val = create_device_vector (q[0], csr_nnz, &val[0], false); - - return; - } - - size_t ell_nnz = ell_pitch * ell_width; - - // Count nonzeros in CSR part of the matrix. - for(size_t i = ell_width + 1; i <= max_width; ++i) - csr_nnz += hist[i] * (i - ell_width); - - /* 3. Split the input matrix into ELL and CSR submatrices. */ - std::vector _ell_col(ell_nnz, static_cast(-1)); - std::vector _ell_val(ell_nnz * N * N); - std::vector _csr_ptr; - std::vector _csr_col; - std::vector _csr_val; - - if (csr_nnz) { - _csr_ptr.resize(n + 1); - _csr_col.resize(csr_nnz); - _csr_val.resize(csr_nnz * N * N); - - _csr_ptr[0] = 0; - for(size_t i = 0; i < n; ++i) { - size_t w = ptr[i+1] - ptr[i]; - _csr_ptr[i+1] = _csr_ptr[i] + (w > ell_width ? w - ell_width : 0); - } - } - - - for(size_t i = 0; i < n; ++i) { - size_t w = 0; - Ptr csr_head = csr_nnz ? _csr_ptr[i] : 0; - for(Ptr j = ptr[i], e = ptr[i+1]; j < e; ++j, ++w) { - Col c = col[j]; - Val v = val[j]; - - if (w < ell_width) { - _ell_col[i + w * ell_pitch] = c; - for(int k = 0, ii = 0; ii < N; ++ii) - for(int jj = 0; jj < N; ++jj, ++k) - _ell_val[k * ell_nnz + w * ell_pitch + i] = v(ii,jj); - } else { - _csr_col[csr_head] = c; - for(int k = 0, ii = 0; ii < N; ++ii) - for(int jj = 0; jj < N; ++jj, ++k) - _csr_val[k * csr_nnz + csr_head] = v(ii,jj); - ++csr_head; - } - } - } - - { - size_t ell_size = ell_pitch * ell_width; - ell_col = backend::device_vector(q[0], ell_size, _ell_col.data()); - ell_val = backend::device_vector (q[0], ell_nnz * N * N, _ell_val.data()); - } - - if (csr_nnz) { - csr_ptr = backend::device_vector(q[0], n + 1, _csr_ptr.data()); - csr_col = backend::device_vector(q[0], csr_nnz, _csr_col.data()); - csr_val = backend::device_vector (q[0], csr_nnz * N * N, _csr_val.data()); - } - } - - // Dummy matrix; used internally to pass empty parameters to kernels. - ell(const backend::command_queue &q) - : q(q), n(0), m(0), nnz(0), ell_width(0), ell_pitch(0), csr_nnz(0) - {} - - template - friend - typename std::enable_if< - boost::proto::matches< - typename boost::proto::result_of::as_expr::type, - vector_expr_grammar - >::value, - matrix_vector_product - >::type - operator*(const ell &A, const Expr &x) { - return matrix_vector_product(A, x); - } - - template - static void terminal_preamble(const Vector &x, backend::source_generator &src, - const backend::command_queue &q, const std::string &prm_name, - detail::kernel_generator_state_ptr state) - { - detail::output_terminal_preamble tp(src, q, prm_name + "_x", state); - boost::proto::eval(boost::proto::as_child(x), tp); - } - - template - static void local_terminal_init(const Vector &x, backend::source_generator &src, - const backend::command_queue &q, const std::string &prm_name, - detail::kernel_generator_state_ptr state) - { - typedef typename detail::return_type::type x_type; - typedef amgcl::static_matrix vector_value; - - src.new_line() << type_name() << " " << prm_name << "_sum;"; - for(int i = 0; i < N; ++i) { - src.new_line() << prm_name << "_sum.data[" << i << "][0] = 0;"; - } - src.open("{"); - - // ELL part - src.new_line() << type_name() << " " << prm_name << "_ell_size = " << prm_name << "_ell_width * " << prm_name << "_ell_pitch;"; - src.new_line() << "for(size_t j = 0; j < " << prm_name << "_ell_width; ++j)"; - src.open("{"); - src.new_line() << type_name() << " nnz_idx = idx + j * " << prm_name << "_ell_pitch;"; - src.new_line() << type_name() << " c = " << prm_name << "_ell_col[nnz_idx];"; - src.new_line() << "if (c != (" << type_name() << ")(-1))"; - src.open("{"); - - src.new_line() << type_name() << " idx = c;"; - - { - detail::output_local_preamble init_x(src, q, prm_name + "_x", state); - boost::proto::eval(boost::proto::as_child(x), init_x); - - backend::source_generator vec_value; - detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state); - boost::proto::eval(boost::proto::as_child(x), expr_x); - - src.open("{"); - src.new_line() << type_name() << " v = " << vec_value.str() << ";"; - for(int k = 0, j = 0; j < N; ++j) { - src.new_line() << prm_name << "_sum.data[" << j << "][0] += "; - for(int i = 0; i < N; ++i, ++k) { - if (i) src << " + "; - src << prm_name << "_ell_val[" << k << " * " << prm_name << "_ell_size + nnz_idx] * v.data[" << i << "][0]"; - } - src << ";"; - } - src.close("}"); - } - - src.close("} else break;"); - src.close("}"); - - // CSR part - src.new_line() << "if (" << prm_name << "_csr_ptr)"; - src.open("{"); - src.new_line() << type_name() << " " << prm_name << "_csr_size = " << prm_name << "_csr_ptr[n];"; - src.new_line() << type_name() << " csr_beg = " << prm_name << "_csr_ptr[idx];"; - src.new_line() << type_name() << " csr_end = " << prm_name << "_csr_ptr[idx+1];"; - src.new_line() << "for(" << type_name() << " j = csr_beg; j < csr_end; ++j)"; - src.open("{"); - - src.new_line() << type_name() << " idx = " << prm_name << "_csr_col[j];"; - - { - detail::output_local_preamble init_x(src, q, prm_name + "_x", state); - boost::proto::eval(boost::proto::as_child(x), init_x); - - backend::source_generator vec_value; - detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state); - boost::proto::eval(boost::proto::as_child(x), expr_x); - - src.open("{"); - src.new_line() << type_name() << " v = " << vec_value.str() << ";"; - for(int k = 0, j = 0; j < N; ++j) { - src.new_line() << prm_name << "_sum.data[" << j << "][0] += "; - for(int i = 0; i < N; ++i, ++k) { - if (i) src << " + "; - src << prm_name << "_csr_val[" << k << " * " << prm_name << "_csr_size + j] * v.data[" << i << "][0]"; - } - src << ";"; - } - src.close("}"); - } - - src.close("}"); - src.close("}"); - src.close("}"); - } - - template - static void kernel_param_declaration(const Vector &x, backend::source_generator &src, - const backend::command_queue &q, const std::string &prm_name, - detail::kernel_generator_state_ptr state) - { - src.parameter< size_t >(prm_name + "_ell_width"); - src.parameter< size_t >(prm_name + "_ell_pitch"); - - src.parameter< global_ptr >(prm_name + "_ell_col"); - src.parameter< global_ptr >(prm_name + "_ell_val"); - src.parameter< global_ptr >(prm_name + "_csr_ptr"); - src.parameter< global_ptr >(prm_name + "_csr_col"); - src.parameter< global_ptr >(prm_name + "_csr_val"); - - detail::declare_expression_parameter decl_x(src, q, prm_name + "_x", state); - detail::extract_terminals()(boost::proto::as_child(x), decl_x); - } - - template - static void partial_vector_expr(const Vector&, backend::source_generator &src, - const backend::command_queue&, const std::string &prm_name, - detail::kernel_generator_state_ptr) - { - src << prm_name << "_sum"; - } - - template - void kernel_arg_setter(const Vector &x, - backend::kernel &kernel, unsigned part, size_t index_offset, - detail::kernel_generator_state_ptr state) const - { - kernel.push_arg(ell_width); - kernel.push_arg(ell_pitch); - if (ell_width) { - kernel.push_arg(ell_col); - kernel.push_arg(ell_val); - } else { - kernel.push_arg(static_cast(0)); - kernel.push_arg(static_cast(0)); - } - if (csr_nnz) { - kernel.push_arg(csr_ptr); - kernel.push_arg(csr_col); - kernel.push_arg(csr_val); - } else { - kernel.push_arg(static_cast(0)); - kernel.push_arg(static_cast(0)); - kernel.push_arg(static_cast(0)); - } - - detail::set_expression_argument x_args(kernel, part, index_offset, state); - detail::extract_terminals()( boost::proto::as_child(x), x_args); - } - - template - void expression_properties(const Vector&, - std::vector &queue_list, - std::vector &partition, - size_t &size) const - { - queue_list = std::vector(1, q); - partition = std::vector(2, 0); - partition.back() = size = n; - } - - size_t rows() const { return n; } - size_t cols() const { return m; } - size_t nonzeros() const { return nnz; } - private: - backend::command_queue q; - - size_t n, m, nnz, ell_width, ell_pitch, csr_nnz; - - backend::device_vector ell_col; - backend::device_vector ell_val; - - backend::device_vector csr_ptr; - backend::device_vector csr_col; - backend::device_vector csr_val; - - backend::device_vector create_device_vector(const backend::command_queue &q, - size_t nnz, const Val *host_data, bool fast = true) - { - backend::device_vector val(q, nnz * N * N); - - if (nnz) { - if (fast) { - backend::device_vector tmp(q, nnz * N * N, reinterpret_cast(host_data)); - - VEX_FUNCTION(T, transpose, (int,k)(int,m)(int,nnz)(T*, v), - int i = k / nnz; - int j = k % nnz; - return v[j * m + i]; - ); - - vex::vector(q,val) = transpose(vex::element_index(), N*N, nnz, raw_pointer(vex::vector(q, tmp))); - } else { - auto v = val.map(q); - - for(int k = 0, i = 0; i < N; ++i) - for(int j = 0; j < N; ++j, ++k) - for(size_t m = 0; m < nnz; ++m) - v[k * nnz + m] = host_data[m](i,j); - } - } - - return val; - } - - backend::kernel& csr2ell_kernel() const { - using namespace vex::detail; - static kernel_cache cache; - - auto kernel = cache.find(q); - if (kernel == cache.end()) { - backend::source_generator src(q); - - src.begin_kernel("convert_csr2ell"); - src.begin_kernel_parameters(); - src.template parameter("n"); - src.template parameter("ell_width"); - src.template parameter("ell_pitch"); - src.template parameter< global_ptr >("ptr"); - src.template parameter< global_ptr >("col"); - src.template parameter< global_ptr >("val"); - src.template parameter< global_ptr >("ell_col"); - src.template parameter< global_ptr >("ell_val"); - src.template parameter< global_ptr >("csr_ptr"); - src.template parameter< global_ptr >("csr_col"); - src.template parameter< global_ptr >("csr_val"); - src.end_kernel_parameters(); - src.new_line() << type_name() << " nnz = ptr[n];"; - src.new_line() << type_name() << " ell_nnz = ell_width * ell_pitch;"; - src.new_line() << type_name() << " csr_nnz = csr_ptr ? csr_ptr[n] : 0;"; - src.grid_stride_loop().open("{"); - - src.new_line() << type_name() << " w = 0;"; - src.new_line() << type_name() << " csr_head = 0;"; - src.new_line() << "if (csr_ptr) csr_head = csr_ptr[idx];"; - src.new_line() << "for(" << type_name() << " j = ptr[idx], e = ptr[idx+1]; j < e; ++j, ++w)"; - src.open("{"); - src.new_line() << type_name() << " c = col[j];"; - src.new_line() << "if (w < ell_width) {"; - src.new_line() << " ell_col[idx + w * ell_pitch] = c;"; - for(int i = 0; i < N * N; ++i) - src.new_line() << " ell_val[" << i << " * ell_nnz + w * ell_pitch + idx] = val[" << i << " * nnz + j];"; - src.new_line() << "} else {"; - src.new_line() << " csr_col[csr_head] = c;"; - for(int i = 0; i < N * N; ++i) - src.new_line() << " csr_val[" << i << " * csr_nnz + csr_head] = val[" << i << " * nnz + j];"; - src.new_line() << " ++csr_head;"; - src.new_line() << "}"; - src.close("}"); - src.close("}"); - src.end_kernel(); - - kernel = cache.insert(q, backend::kernel(q, src.str(), "convert_csr2ell")); - } - - return kernel->second; - } - - template - void convert( - const PtrRange &host_ptr, - const ColRange &host_col, - const ValRange &host_val - ) - { - size_t nnz = host_ptr[n]; - - backend::device_vector Aptr(q, n + 1, &host_ptr[0]); - backend::device_vector Acol(q, nnz, nnz ? &host_col[0] : nullptr); - backend::device_vector Aval = create_device_vector(q, nnz, nnz ? &host_val[0] : nullptr); - - /* 1. Get optimal ELL widths for local and remote parts. */ - // Speed of ELL relative to CSR: - const double ell_vs_csr = 3.0; - - // Find maximum widths for local and remote parts: - std::vector ctx(1, q); - Reductor max(ctx); - - vex::vector ptr(q, Aptr); - - VEX_FUNCTION(Ptr, row_width, (size_t, i)(const Ptr*, ptr), - return ptr[i+1] - ptr[i]; - ); - - int max_width = max(row_width(element_index(0, n), raw_pointer(ptr))); - - // Build width distribution histogram. - vex::vector hist(ctx, max_width + 1); - hist = 0; - eval(atomic_add(&permutation(row_width(element_index(0, n), raw_pointer(ptr)))(hist), 1)); - - // Estimate optimal width for ELL part of the matrix, - // count nonzeros in CSR part of the matrix - ell_width = max_width; - { - auto h = hist.map(0); - - for(int i = 0, rows = n; i < max_width; ++i) { - rows -= h[i]; // Number of rows wider than i. - if (ell_vs_csr * rows < n) { - ell_width = i; - break; - } - } - - for(int i = ell_width + 1; i <= max_width; ++i) - csr_nnz += h[i] * (i - ell_width); - } - - if (ell_width == 0) { - assert(csr_nnz == nnz); - - csr_ptr = Aptr; - csr_col = Acol; - csr_val = Aval; - - return; - } - - if (csr_nnz) { - VEX_FUNCTION(int, csr_width, (size_t, ell_width)(size_t, i)(const Ptr*, ptr), - if (i == 0) return 0; - int w = ptr[i] - ptr[i-1]; - return (w > ell_width) ? (w - ell_width) : 0; - ); - - vex::vector csr_w(ctx, n+1); - - csr_ptr = backend::device_vector(q, n + 1); - csr_col = backend::device_vector(q, csr_nnz); - csr_val = backend::device_vector (q, csr_nnz * N * N); - - csr_w = csr_width(ell_width, element_index(), raw_pointer(ptr)); - vector csr_p(q, csr_ptr); - inclusive_scan(csr_w, csr_p); - } - - - /* 3. Split the input matrix into ELL and CSR submatrices. */ - ell_col = backend::device_vector(q, ell_pitch * ell_width); - ell_val = backend::device_vector (q, ell_pitch * ell_width * N * N); - - vex::vector(q, ell_col) = -1; - - auto &convert = csr2ell_kernel(); - - convert.push_arg(n); - convert.push_arg(ell_width); - convert.push_arg(ell_pitch); - convert.push_arg(Aptr); - convert.push_arg(Acol); - convert.push_arg(Aval); - convert.push_arg(ell_col); - convert.push_arg(ell_val); - if (csr_nnz) { - convert.push_arg(csr_ptr); - convert.push_arg(csr_col); - convert.push_arg(csr_val); - } else { - convert.push_arg(static_cast(0)); - convert.push_arg(static_cast(0)); - convert.push_arg(static_cast(0)); - } - convert(q); - } -}; - -} // namespace sparse -} // namespace vex - -namespace amgcl { -namespace backend { - -template -std::string vexcl_static_matrix_declaration() { - std::ostringstream s; - s << "typedef struct { " << vex::type_name() << " data[" << N << "][" << N << "]; } " - "amgcl_matrix_" << vex::type_name() << "_" << N << "x" << N << ";\n"; - if (N != 1) - s << "typedef struct { " << vex::type_name() << " data[" << N << "][" << 1 << "]; } " - "amgcl_matrix_" << vex::type_name() << "_" << N << "x" << 1 << ";\n"; - return s.str(); -} - -template -struct vex_scale { - typedef static_matrix vector; - - struct apply_type : vex::UserFunction { - apply_type() {} - - static std::string name() { - return "scale_" + vex::type_name(); - } - - static void define(vex::backend::source_generator &src, const std::string &fname = name()) { - src.begin_function(fname); - src.begin_function_parameters(); - src.parameter("a"); - src.parameter("m"); - src.end_function_parameters(); - for(int i = 0; i < N; ++i) - src.new_line() << "m.data[" << i << "][0] *= a;"; - src.new_line() << "return m;"; - src.end_function(); - } - } const apply; -}; - -template struct vex_convert; - -template -struct vex_convert::value>::type> -{ - typedef static_matrix src_vector; - typedef static_matrix dst_vector; - - struct apply_type : vex::UserFunction { - apply_type() {} - - static std::string name() { - return "convert_" + vex::type_name() + "_" + vex::type_name(); - } - - static void define(vex::backend::source_generator &src, const std::string &fname = name()) { - src.begin_function(fname); - src.begin_function_parameters(); - src.parameter("s"); - src.end_function_parameters(); - src.new_line() << vex::type_name() << " d;"; - for(int i = 0; i < N; ++i) - src.new_line() << "d.data[" << i << "][0] = s.data[" << i << "][0];"; - src.new_line() << "return d;"; - src.end_function(); - } - } const apply; -}; - -template -struct vex_convert::value>::type> -{ - template - static const X& apply(const X &x) { return x; } -}; - -template -struct vex_add { - typedef static_matrix vectorA; - typedef static_matrix vectorB; - - struct apply_type : vex::UserFunction { - apply_type() {} - - static std::string name() { - return "add_" + vex::type_name() + "_" + vex::type_name(); - } - - static void define(vex::backend::source_generator &src, const std::string &fname = name()) { - src.begin_function(fname); - src.begin_function_parameters(); - src.parameter("a"); - src.parameter("b"); - src.end_function_parameters(); - for(int i = 0; i < N; ++i) - src.new_line() << "a.data[" << i << "][0] += " - << "b.data[" << i << "][0];"; - src.new_line() << "return a;"; - src.end_function(); - } - } const apply; -}; - -template -struct vex_sub { - typedef static_matrix vectorA; - typedef static_matrix vectorB; - - struct apply_type : vex::UserFunction { - apply_type() {} - - static std::string name() { - return "sub_" + vex::type_name() + "_" + vex::type_name(); - } - - static void define(vex::backend::source_generator &src, const std::string &fname = name()) { - src.begin_function(fname); - src.begin_function_parameters(); - src.parameter("a"); - src.parameter("b"); - src.end_function_parameters(); - for(int i = 0; i < N; ++i) - src.new_line() << "a.data[" << i << "][0] -= " - << "b.data[" << i << "][0];"; - src.new_line() << "return a;"; - src.end_function(); - } - } const apply; -}; - -template -struct vex_mul { - typedef static_matrix matrix; - typedef static_matrix vectorX; - typedef static_matrix vectorY; - - struct apply_type : vex::UserFunction { - apply_type() {} - - static std::string name() { - return "mul_" + vex::type_name() + "_" + vex::type_name(); - } - - static void define(vex::backend::source_generator &src, const std::string &fname = name()) { - src.begin_function(fname); - src.begin_function_parameters(); - src.parameter("a"); - src.parameter("b"); - src.end_function_parameters(); - src.new_line() << vex::type_name() << " c;"; - for(int i = 0; i < N; ++i) { - src.new_line() << "c.data[" << i << "][0] = "; - for(int j = 0; j < N; ++j) { - if (j) src << " + "; - src << "a.data[" << i << "][" << j << "] * b.data[" << j << "][0]"; - } - src << ";"; - } - src.new_line() << "return c;"; - src.end_function(); - } - } const apply; -}; - -template -struct spmv_impl, C, P>>, - vex::vector>, Beta, vex::vector>, - typename std::enable_if<(B > 1)>::type - > -{ - typedef vex::sparse::distributed, C, P>> matrix; - typedef vex::vector> vectorX; - typedef vex::vector> vectorY; - - static void apply(Alpha alpha, const matrix &A, const vectorX &x, Beta beta, vectorY &y) - { - if (beta) - y = vex_add().apply(vex_scale().apply(beta, y), vex_scale().apply(alpha, A * x)); - else - y = vex_convert().apply(vex_scale().apply(alpha, A * x)); - } -}; - - -template -struct spmv_impl>, - vex::vector, Beta, vex::vector, - typename std::enable_if< - (math::static_rows::value == 1) && ( - math::static_rows::value != math::static_rows::value || - math::static_rows::value != math::static_rows::value - ) - >::type> -{ - typedef vex::sparse::distributed> matrix; - typedef vex::vector vectorx; - typedef vex::vector vectory; - - static void apply(Alpha alpha, const matrix &A, const vectorx &x, Beta beta, vectory &y) - { - auto _x = x.template reinterpret::type>(); - auto _y = y.template reinterpret::type>(); - spmv(alpha, A, _x, beta, _y); - } -}; - -template -struct spmv_impl>, - vex::vector, Beta, vex::vector, - typename std::enable_if< - (math::static_rows::value > 1) && ( - math::static_rows::value != math::static_rows::value || - math::static_rows::value != math::static_rows::value - ) - >::type> -{ - typedef vex::sparse::distributed> matrix; - typedef vex::vector vectorx; - typedef vex::vector vectory; - - static void apply(Alpha alpha, const matrix &A, const vectorx &x, Beta beta, vectory &y) - { - const int B = math::static_rows::value; - auto _x = x.template reinterpret::type, B, 1>>(); - auto _y = y.template reinterpret::type, B, 1>>(); - spmv(alpha, A, _x, beta, _y); - } -}; - -template -struct residual_impl< - vex::sparse::distributed, C, P>>, - vex::vector>, - vex::vector>, - vex::vector>, - typename std::enable_if<(B > 1)>::type - > -{ - typedef vex::sparse::distributed, C, P>> matrix; - typedef vex::vector> vectorB; - typedef vex::vector> vectorX; - typedef vex::vector> vectorR; - - static void apply(const vectorB &rhs, const matrix &A, const vectorX &x, vectorR &r) - { - r = vex_convert().apply(vex_sub().apply(rhs, A * x)); - } -}; - -template -struct residual_impl< - vex::sparse::distributed, C, P>>, - vex::vector, - vex::vector, - vex::vector, - typename std::enable_if< - math::static_rows::value == 1 && - math::static_rows::value == 1 && - math::static_rows::value == 1 && - (B > 1) - >::type - > -{ - typedef vex::sparse::distributed, C, P>> matrix; - typedef vex::vector vectorB; - typedef vex::vector vectorX; - typedef vex::vector vectorR; - - static void apply(const vectorB &b, const matrix &A, const vectorX &x, vectorR &r) - { - typedef static_matrix VB; - typedef static_matrix VX; - typedef static_matrix VR; - - auto _b = b.template reinterpret(); - auto _x = x.template reinterpret(); - auto _r = r.template reinterpret(); - - _r = vex_convert().apply(vex_sub().apply(_b, A * _x)); - } -}; - -template < typename Alpha, typename Beta, typename TX, typename TY, typename TZ, int B > -struct vmul_impl< - Alpha, vex::vector< static_matrix >, - vex::vector< static_matrix >, - Beta, vex::vector< static_matrix > - > -{ - typedef vex::vector< static_matrix > matrix; - typedef vex::vector< static_matrix > vectorY; - typedef vex::vector< static_matrix > vectorZ; - - static void apply(Alpha a, const matrix &x, const vectorY &y, Beta b, vectorZ &z) - { - if (b) - z = vex_add().apply(vex_scale().apply(b, z), vex_scale().apply(a, vex_mul().apply(x, y))); - else - z = vex_convert().apply(vex_scale().apply(a, vex_mul().apply(x, y))); - } -}; - -template < typename T, int B > -struct clear_impl< vex::vector< static_matrix > > -{ - typedef static_matrix vector_value; - typedef vex::vector vector; - - static void apply(vector &x) { - x.template reinterpret() = 0; - } -}; - -template < typename T, int B > -struct copy_impl< - vex::vector< static_matrix >, - vex::vector< static_matrix > - > -{ - typedef vex::vector< static_matrix > vector; - - static void apply(const vector &x, vector &y) { - auto X = x.template reinterpret(); - auto Y = y.template reinterpret(); - Y = X; - } -}; - -template < typename A, typename B, typename TX, typename TY, int N > -struct axpby_impl< - A, vex::vector< static_matrix >, - B, vex::vector< static_matrix > - > -{ - typedef vex::vector< static_matrix > vectorX; - typedef vex::vector< static_matrix > vectorY; - - static void apply(A a, const vectorX &x, B b, vectorY &y) { - if (b) - y.template reinterpret() = - a * x.template reinterpret() + - b * y.template reinterpret(); - else - y.template reinterpret() = - a * x.template reinterpret(); - } -}; - -template < typename A, typename B, typename C, typename TX, typename TY, typename TZ, int N > -struct axpbypcz_impl< - A, vex::vector< static_matrix >, - B, vex::vector< static_matrix >, - C, vex::vector< static_matrix > - > -{ - typedef vex::vector< static_matrix > vectorX; - typedef vex::vector< static_matrix > vectorY; - typedef vex::vector< static_matrix > vectorZ; - - static void apply(A a, const vectorX &x, B b, const vectorY &y, C c, vectorZ &z) { - if (c) - z.template reinterpret() = - a * x.template reinterpret() + - b * y.template reinterpret() + - c * z.template reinterpret(); - else - z.template reinterpret() = - a * x.template reinterpret() + - b * y.template reinterpret(); - } -}; - -template < typename T, int B > -struct inner_product_impl< - vex::vector< static_matrix >, - vex::vector< static_matrix > - > -{ - typedef T return_type; - typedef static_matrix vector_value; - typedef vex::vector vector; - - static return_type get(const vector &x, const vector &y) { - vex::Reductor sum( x.queue_list() ); - return sum( x.template reinterpret() * y.template reinterpret() ); - } -}; - -namespace detail { - -template -struct common_scalar_backend< backend::vexcl, backend::vexcl, - typename std::enable_if< - math::static_rows::value != 1 || - math::static_rows::value != 1 - >::type> -{ - typedef typename math::scalar_of::type S1; - typedef typename math::scalar_of::type S2; - - typedef - typename std::conditional< - (sizeof(S1) > sizeof(S2)), backend::vexcl, backend::vexcl - >::type - type; -}; - -} // namespace detail -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/backend/viennacl.hpp b/src/solvers/amgcl/backend/viennacl.hpp deleted file mode 100644 index 73b993a..0000000 --- a/src/solvers/amgcl/backend/viennacl.hpp +++ /dev/null @@ -1,462 +0,0 @@ -#ifndef AMGCL_BACKEND_VIENNACL_HPP -#define AMGCL_BACKEND_VIENNACL_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/backend/viennacl.hpp - * \author Denis Demidov - * \brief ViennaCL backend. - */ - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace amgcl { - -namespace solver { - -/** Wrapper around solver::skyline_lu for use with the ViennaCL backend. - * Copies the rhs to the host memory, solves the problem using the host CPU, - * then copies the solution back to the compute device(s). - */ -template -struct viennacl_skyline_lu : solver::skyline_lu { - typedef solver::skyline_lu Base; - - mutable std::vector _rhs, _x; - - template - viennacl_skyline_lu(const Matrix &A, const Params&) - : Base(*A), _rhs(backend::rows(*A)), _x(backend::rows(*A)) - { } - - template - void operator()(const Vec1 &rhs, Vec2 &x) const { - viennacl::fast_copy(rhs, _rhs); - static_cast(this)->operator()(_rhs, _x); - viennacl::fast_copy(_x, x); - } -}; - -} - -namespace backend { - -/// ViennaCL backend -/** - * This is a backend that uses types defined in the ViennaCL library - * (http://viennacl.sourceforge.net). - * - * \param Matrix ViennaCL matrix to use with the backend. Possible choices are - * viannacl::compressed_matrix, viennacl::ell_matrix, and - * viennacl::hyb_matrix. - * \ingroup backends - */ -template < - class Matrix, - class DirectSolver = solver::viennacl_skyline_lu::type> - > -struct viennacl { - typedef typename backend::value_type::type value_type; - typedef ptrdiff_t index_type; - typedef ptrdiff_t col_type; - typedef ptrdiff_t ptr_type; - typedef Matrix matrix; - typedef ::viennacl::vector vector; - typedef ::viennacl::vector matrix_diagonal; - typedef DirectSolver direct_solver; - - struct provides_row_iterator : std::false_type {}; - - /// Backend parameters. - typedef amgcl::detail::empty_params params; - - static std::string name() { return "viennacl"; } - - /// Copy matrix from builtin backend. - static std::shared_ptr - copy_matrix( - std::shared_ptr< typename builtin::matrix > A, - const params& - ) - { - auto m = std::make_shared(); - ::viennacl::copy(viennacl_matrix_adapter(*A), *m); - return m; - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector(typename builtin::vector const &x, const params&) - { - auto v = std::make_shared(x.size()); - ::viennacl::fast_copy(x.data(), x.data() + x.size(), v->begin()); - return v; - } - - /// Copy vector from builtin backend. - static std::shared_ptr - copy_vector( - std::shared_ptr< typename builtin::vector > x, - const params &prm - ) - { - return copy_vector(*x, prm); - } - - /// Create vector of the specified size. - static std::shared_ptr - create_vector(size_t size, const params&) - { - return std::make_shared(size); - } - - /// Create direct solver for coarse level - static std::shared_ptr - create_solver(std::shared_ptr< typename builtin::matrix > A, const params &prm) - { - return std::make_shared(A, prm); - } - - private: - struct viennacl_matrix_adapter { - typedef ptrdiff_t index_type; - typedef size_t size_type; - - class const_iterator1; - - class const_iterator2 { - public: - bool operator!=(const const_iterator2 &it) const { - return pos != it.pos; - } - - const const_iterator2& operator++() { - ++pos; - return *this; - } - - index_type index1() const { - return row; - } - - index_type index2() const { - return col[pos]; - } - - value_type operator*() const { - return val[pos]; - } - private: - const_iterator2(index_type row, index_type pos, - const index_type *col, const value_type *val) - : row(row), pos(pos), col(col), val(val) - { } - - index_type row; - index_type pos; - const index_type *col; - const value_type *val; - - friend class const_iterator1; - }; - - class const_iterator1 { - public: - bool operator!=(const const_iterator1 &it) const { - return pos != it.pos; - } - - const const_iterator1& operator++() { - ++pos; - return *this; - } - - index_type index1() const { - return pos; - } - - const const_iterator2 begin() const { - return const_iterator2(pos, row[pos], col, val); - } - - const const_iterator2 end() const { - return const_iterator2(pos, row[pos + 1], col, val); - } - private: - const_iterator1(index_type pos, - const index_type *row, - const index_type *col, - const value_type *val - ) - : pos(pos), row(row), col(col), val(val) - { } - - index_type pos; - const index_type *row; - const index_type *col; - const value_type *val; - - friend struct viennacl_matrix_adapter; - }; - - viennacl_matrix_adapter( - const typename backend::builtin::matrix &A) - : rows(A.nrows), cols(A.ncols), - row(A.ptr), col(A.col), val(A.val) - { } - - const_iterator1 begin1() const { - return const_iterator1(0, row, col, val); - } - - const_iterator1 end1() const { - return const_iterator1(rows, row, col, val); - } - - size_t size1() const { - return rows; - } - - size_t size2() const { - return cols; - } - private: - size_t rows; - size_t cols; - - const index_type *row; - const index_type *col; - const value_type *val; - }; -}; - -template -struct is_viennacl_matrix : std::false_type {}; - -template -struct is_viennacl_matrix< ::viennacl::compressed_matrix > : std::true_type -{}; - -template -struct is_viennacl_matrix< ::viennacl::hyb_matrix > : std::true_type -{}; - -template -struct is_viennacl_matrix< ::viennacl::ell_matrix > : std::true_type -{}; - -template -struct value_type< - M, - typename std::enable_if< is_viennacl_matrix::value >::type - > -{ - typedef typename M::value_type::value_type type; -}; - -template -struct value_type< ::viennacl::vector > -{ - typedef V type; -}; - -template -struct rows_impl< - M, - typename std::enable_if< is_viennacl_matrix::value >::type - > -{ - static size_t get(const M &A) { - return A.size1(); - } -}; - -template -struct cols_impl< - M, - typename std::enable_if< is_viennacl_matrix::value >::type - > -{ - static size_t get(const M &A) { - return A.size2(); - } -}; - -template -struct nonzeros_impl< ::viennacl::compressed_matrix > { - static size_t get(const ::viennacl::compressed_matrix &A) { - return A.nnz(); - } -}; - -template -struct nonzeros_impl< ::viennacl::ell_matrix > { - static size_t get(const ::viennacl::ell_matrix &A) { - return A.nnz(); - } -}; - -template -struct nonzeros_impl< ::viennacl::hyb_matrix > { - static size_t get(const ::viennacl::hyb_matrix &A) { - return A.ell_nnz() + A.csr_nnz(); - } -}; - -template -struct spmv_impl< - Alpha, Mtx, Vec, Beta, Vec, - typename std::enable_if< is_viennacl_matrix::value >::type - > -{ - static void apply(Alpha alpha, const Mtx &A, const Vec &x, Beta beta, Vec &y) - { - if (beta) - y = alpha * ::viennacl::linalg::prod(A, x) + beta * y; - else - y = alpha * ::viennacl::linalg::prod(A, x); - } -}; - -template -struct residual_impl< - Mtx, Vec, Vec, Vec, - typename std::enable_if< is_viennacl_matrix::value >::type - > -{ - typedef typename value_type::type V; - - static void apply(const Vec &rhs, const Mtx &A, const Vec &x, Vec &r) - { - r = ::viennacl::linalg::prod(A, x); - r = rhs - r; - } -}; - -template < typename V > -struct clear_impl< ::viennacl::vector > -{ - static void apply(::viennacl::vector &x) - { - x.clear(); - } -}; - -template < typename V > -struct inner_product_impl< - ::viennacl::vector, - ::viennacl::vector - > -{ - static V get(const ::viennacl::vector &x, const ::viennacl::vector &y) - { - return ::viennacl::linalg::inner_prod(x, y); - } -}; - -template < typename A, typename B, typename V > -struct axpby_impl< - A, ::viennacl::vector, - B, ::viennacl::vector - > -{ - static void apply( - A a, const ::viennacl::vector &x, - B b, ::viennacl::vector &y - ) - { - if (b) - y = a * x + b * y; - else - y = a * x; - } -}; - -template < typename A, typename B, typename C, typename V > -struct axpbypcz_impl< - A, ::viennacl::vector, - B, ::viennacl::vector, - C, ::viennacl::vector - > -{ - static void apply( - A a, const ::viennacl::vector &x, - B b, const ::viennacl::vector &y, - C c, ::viennacl::vector &z - ) - { - if (c) - z = a * x + b * y + c * z; - else - z = a * x + b * y; - } -}; - -template < typename A, typename B, typename V > -struct vmul_impl< - A, ::viennacl::vector, ::viennacl::vector, - B, ::viennacl::vector - > -{ - static void apply( - A a, const ::viennacl::vector &x, const ::viennacl::vector &y, - B b, ::viennacl::vector &z) - { - if (b) - z = a * ::viennacl::linalg::element_prod(x, y) + b * z; - else - z = a * ::viennacl::linalg::element_prod(x, y); - } -}; - -template < typename V > -struct copy_impl< - ::viennacl::vector, - ::viennacl::vector - > -{ - static void apply(const ::viennacl::vector &x, ::viennacl::vector &y) - { - y = x; - } -}; - -} // namespace backend -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/coarsening/aggregation.hpp b/src/solvers/amgcl/coarsening/aggregation.hpp deleted file mode 100644 index e7d490c..0000000 --- a/src/solvers/amgcl/coarsening/aggregation.hpp +++ /dev/null @@ -1,164 +0,0 @@ -#ifndef AMGCL_COARSENING_AGGREGATION_HPP -#define AMGCL_COARSENING_AGGREGATION_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/coarsening/aggregation.hpp - * \author Denis Demidov - * \brief Non-smoothed aggregation coarsening. - */ - -#include -#include - -#include -#include -#include -#include -#include - -namespace amgcl { - -/// Coarsening strategies -namespace coarsening { - -/** - * \defgroup coarsening Coarsening strategies - * \brief Coarsening strategies for AMG hirarchy construction. - * - * A coarsener in AMGCL is a class that takes a system matrix and returns three - * operators: - * - * 1. Restriction operator R that downsamples the residual error to a - * coarser level in AMG hierarchy, - * 2. Prolongation operator P that interpolates a correction computed on a - * coarser grid into a finer grid, - * 3. System matrix \f$A^H\f$ at a coarser level that is usually computed as a - * Galerkin operator \f$A^H = R A^h P\f$. - * - * The AMG hierarchy is constructed by recursive invocation of the selected - * coarsener. - */ - -/// Non-smoothed aggregation. -/** - * \ingroup coarsening - */ -template -struct aggregation { - typedef pointwise_aggregates Aggregates; - - /// Coarsening parameters. - struct params { - /// Aggregation parameters. - Aggregates::params aggr; - - /// Near nullspace parameters. - nullspace_params nullspace; - - /// Over-interpolation factor \f$\alpha\f$. - /** - * In case of aggregation coarsening, coarse-grid - * correction of smooth error, and by this the overall convergence, can - * often be substantially improved by using "over-interpolation", that is, - * by multiplying the actual correction (corresponding to piecewise - * constant interpolation) by some factor \f$\alpha > 1\f$. Equivalently, - * this means that the coarse-level Galerkin operator is re-scaled by - * \f$1 / \alpha\f$: - * \f[I_h^HA_hI_H^h \to \frac{1}{\alpha}I_h^HA_hI_H^h.\f] - * - * \sa \cite Stuben1999, Section 9.1 "Re-scaling of the Galerkin operator". - */ - float over_interp; - - params() - : over_interp(math::static_rows::value == 1 ? 1.5f : 2.0f) - {} - -#ifndef AMGCL_NO_BOOST - params(const boost::property_tree::ptree &p) - : AMGCL_PARAMS_IMPORT_CHILD(p, aggr), - AMGCL_PARAMS_IMPORT_CHILD(p, nullspace), - AMGCL_PARAMS_IMPORT_VALUE(p, over_interp) - { - check_params(p, {"aggr", "nullspace", "over_interp"}); - } - - void get(boost::property_tree::ptree &p, const std::string &path) const { - AMGCL_PARAMS_EXPORT_CHILD(p, path, aggr); - AMGCL_PARAMS_EXPORT_CHILD(p, path, nullspace); - AMGCL_PARAMS_EXPORT_VALUE(p, path, over_interp); - } -#endif - } prm; - - aggregation(const params &prm = params()) : prm(prm) {} - - /// Creates transfer operators for the given system matrix. - /** - * \param A The system matrix. - * \param prm Coarsening parameters. - * \returns A tuple of prolongation and restriction operators. - */ - template - std::tuple< - std::shared_ptr, - std::shared_ptr - > - transfer_operators(const Matrix &A) { - const size_t n = rows(A); - - AMGCL_TIC("aggregates"); - Aggregates aggr(A, prm.aggr, prm.nullspace.cols); - AMGCL_TOC("aggregates"); - - AMGCL_TIC("interpolation"); - auto P = tentative_prolongation( - n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size - ); - AMGCL_TOC("interpolation"); - - return std::make_tuple(P, transpose(*P)); - } - - /// Creates system matrix for the coarser level. - /** - * \param A The system matrix at the finer level. - * \param P Prolongation operator returned by transfer_operators(). - * \param R Restriction operator returned by transfer_operators(). - * \returns System matrix for the coarser level. - */ - template - std::shared_ptr - coarse_operator(const Matrix &A, const Matrix &P, const Matrix &R) const { - return detail::scaled_galerkin(A, P, R, 1 / prm.over_interp); - } -}; - -} // namespace coarsening -} // namespace amgcl - -#endif diff --git a/src/solvers/amgcl/coarsening/as_scalar.hpp b/src/solvers/amgcl/coarsening/as_scalar.hpp deleted file mode 100644 index 218a2dc..0000000 --- a/src/solvers/amgcl/coarsening/as_scalar.hpp +++ /dev/null @@ -1,119 +0,0 @@ -#ifndef AMGCL_COARSENING_AS_SCALAR_HPP -#define AMGCL_COARSENING_AS_SCALAR_HPP - -/* -The MIT License - -Copyright (c) 2012-2022 Denis Demidov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** - * \file amgcl/coarsening/as_scalar.hpp - * \author Denis Demidov - * \brief Scalar coarsening for block matrices. - */ - -#include -#include -#include - -namespace amgcl { -namespace coarsening { - -// Takes a block matrix as input, -// converts it to the scalar format, -// applies the base coarsening, -// converts the results back to block format. -template