hpsim · greole · Aug 28, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 15, 2024
diff --git a/.reuse/dep5 b/.reuse/dep5
@@ -43,6 +43,10 @@ Files: unitTests/case/*
 Copyright: 2024 OGL authors
 License: GPL-3.0-or-later
 
+Files: unitTests/MatrixWrapper/data/*mtx
+Copyright: 2024 OGL authors
+License: GPL-3.0-or-later
+
 # Sample paragraph, commented out:
 #
 # Files: src/*

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -222,9 +222,10 @@ target_sources(
           src/DevicePersistent/DeviceIdGuard/DeviceIdGuard.C
           src/DevicePersistent/ExecutorHandler/ExecutorHandler.C
           src/MatrixWrapper/HostMatrix.C
-          src/Solver/CG/GKOCG.C
-          src/Solver/BiCGStab/GKOBiCGStab.C
-          src/Solver/GMRES/GKOGMRES.C
+          src/MatrixWrapper/SparsityPattern.C
+          src/Repartitioner.C
+  # src/Solver/CG/GKOCG.C src/Solver/BiCGStab/GKOBiCGStab.C
+  # src/Solver/GMRES/GKOGMRES.C
 )
 
 enable_sanitizers(

diff --git a/cmake/ginkgo.cmake b/cmake/ginkgo.cmake
@@ -10,8 +10,7 @@ if(NOT ${OGL_USE_EXTERNAL_GINKGO})
     QUITE
     GIT_SHALLOW ON
     GIT_REPOSITORY "https://github.com/ginkgo-project/ginkgo.git"
-    GIT_TAG ${GINKGO_CHECKOUT_VERSION}
-    )
+    GIT_TAG ${GINKGO_CHECKOUT_VERSION})
 
   FetchContent_MakeAvailable(Ginkgo)
 endif()
diff --git a/cmake/version.cmake b/cmake/version.cmake
@@ -19,10 +19,8 @@ if("${GIT_REV}" STREQUAL "")
 else()
   execute_process(COMMAND bash -c "git diff --quiet --exit-code || echo +"
                   OUTPUT_VARIABLE GIT_DIFF)
-  execute_process(
-    COMMAND git describe --exact-match --tags
-    OUTPUT_VARIABLE GIT_TAG
-    ERROR_QUIET)
+  execute_process(COMMAND git describe --exact-match --tags
+                  OUTPUT_VARIABLE GIT_TAG ERROR_QUIET)
   execute_process(COMMAND git rev-parse --abbrev-ref HEAD
                   OUTPUT_VARIABLE GIT_BRANCH)
 

diff --git a/include/OGL/CommunicationPattern.H b/include/OGL/CommunicationPattern.H
@@ -129,6 +129,23 @@ struct CommunicationPattern {
         // ASSERT_EQ(target_ids.get_size(), send_idxs.size());
     }
 
+    CommunicationPattern(const ExecutorHandler &exec,
+                         const std::vector<label> &ranks,
+                         const std::vector<std::vector<label>> &rows)
+        : exec_handler(exec),
+          target_ids(exec.get_ref_exec(), ranks.begin(), ranks.end()),
+          target_sizes(exec.get_ref_exec(), ranks.size())
+    {
+        ASSERT_EQ(ranks.size(), rows.size());
+
+        for (int i = 0; i < rows.size(); i++) {
+            target_sizes.get_data()[i] = rows[i].size();
+            gko::array<label> row_array(exec.get_ref_exec(), rows[i].begin(),
+                                        rows[i].end());
+            send_idxs.push_back({row_array, ranks[i]});
+        }
+    }
+
     const gko::experimental::mpi::communicator &get_comm() const
     {
         return *exec_handler.get_communicator().get();

diff --git a/include/OGL/DevicePersistent/ExecutorHandler.H b/include/OGL/DevicePersistent/ExecutorHandler.H
@@ -187,6 +187,8 @@ public:
     {
         return this->device_comm_;
     }
+
+    label get_rank() const { return get_communicator().get()->rank(); };
 };
 
 using PersistentExecutor = ExecutorHandler;

diff --git a/include/OGL/DevicePersistent/Vector.H b/include/OGL/DevicePersistent/Vector.H
@@ -153,9 +153,10 @@ public:
 
         // TODO why does it need its size
         // repartitioner size should be sufficient
-        auto target_vector = dist_vec::create(exec_.get_ref_exec(), *comm.get(),
-                                              gko::dim<2>{global_size, 1},
-                                              gko::dim<2>{local_size, 1}, 1);
+        auto target_vector = dist_vec::create(
+            exec_.get_ref_exec(), *comm.get(),
+            gko::dim<2>{static_cast<gko::size_type>(global_size), 1},
+            gko::dim<2>{static_cast<gko::size_type>(local_size), 1}, 1);
         scalar *host_buffer = host_device_vector->get_local_values();
 
         auto host_buffer_view =

diff --git a/include/OGL/MatrixWrapper/Combination.H b/include/OGL/MatrixWrapper/Combination.H
@@ -66,7 +66,7 @@ public:
         return *this;
     }
 
-     /**
+    /**
      * Returns a list of coefficients of the combination.
      *
      * @return a list of coefficients
@@ -113,7 +113,7 @@ public:
     /*
      * @warning assumes that all entries are unique
      */
-    void convert_to(gko::matrix::Csr<scalar, label> *result) const override 
+    void convert_to(gko::matrix::Csr<scalar, label> *result) const override
     {
         auto exec = this->get_executor();
         auto operators = this->get_operators();
@@ -135,7 +135,7 @@ public:
     /*
      * @warning assumes that all entries are unique
      */
-    void move_to(gko::matrix::Coo<scalar, label> *result) override 
+    void move_to(gko::matrix::Coo<scalar, label> *result) override
     {
         this->convert_to(result);
         this->comb_ = nullptr;
@@ -145,7 +145,7 @@ public:
     /*
      * @warning assumes that all entries are unique
      */
-    void move_to(gko::matrix::Csr<scalar, label> *result) override 
+    void move_to(gko::matrix::Csr<scalar, label> *result) override
     {
         this->convert_to(result);
         this->comb_ = nullptr;

diff --git a/include/OGL/MatrixWrapper/SparsityPattern.H b/include/OGL/MatrixWrapper/SparsityPattern.H
@@ -6,8 +6,23 @@
 
 #include <vector>
 
+#include "OGL/CommunicationPattern.H"
+#include "OGL/common.H"
+
 namespace Foam {
 
+/* @brief based on a comm_pattern consecutive ldu_mappings are computed
+ *
+ * After merging sparsity patterns during repartitioning the ldu mapping are
+ * not consecutive ie they could look like [0 1 2 3 | 0 1 2 3 | 0 1], where
+ * | is the former rank boundary based on the comm pattern we compute a new
+ * mapping as  [ 0 1 2 3 | 4 5 6 7| 8 9 ] where the following offsets [ 0 |
+ * 4 | 8 ] based on the recv_counts are used.
+ *
+ * */
+void make_ldu_mapping_consecutive(const AllToAllPattern &comm_pattern,
+                                  gko::array<label> &ldu_mapping, label rank,
+                                  label ranks_per_gpu);
 
 /* The SparsityPattern holds row and column index data using gko::arrays.
  * This struct is used for easy generation of (distributed) ginkgo matrices.
@@ -17,6 +32,15 @@ namespace Foam {
  *
  * */
 struct SparsityPattern {
+    /* constructor from vectors, assumes a single interface */
+    SparsityPattern(std::vector<label> rows, std::vector<label> cols,
+                    std::vector<label> mapping, std::vector<label> rank)
+        : num_nnz(rows.size())
+    {
+        // NOT fully implementented
+        FatalErrorInFunction << "Not implemented" << abort(FatalError);
+    }
+
     SparsityPattern(std::shared_ptr<const gko::Executor> exec, const label size)
         : num_nnz(size),
           row_idxs{exec, static_cast<gko::size_type>(size)},
@@ -56,6 +80,7 @@ struct SparsityPattern {
         rank = ranks;
     }
 
+    // TODO num_nnz makes no sense ->nnz;
     const label num_nnz;
 
     mutable gko::Array<label> row_idxs;

diff --git a/include/OGL/Repartitioner.H b/include/OGL/Repartitioner.H
@@ -0,0 +1,176 @@
+// SPDX-FileCopyrightText: 2024 OGL authors
+//
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <ginkgo/ginkgo.hpp>
+
+#include "fvCFD.H"
+
+#include "CommunicationPattern.H"
+#include "MatrixWrapper/SparsityPattern.H"
+
+template <typename T>
+std::vector<T> apply_permutation(const std::vector<T> vec,
+                                 const std::vector<label> &p)
+{
+    std::vector<T> sorted_vec(vec.size());
+    std::transform(p.begin(), p.end(), sorted_vec.begin(),
+                   [&](label i) { return vec[i]; });
+    return sorted_vec;
+}
+
+template <typename T, typename Compare>
+std::vector<label> sort_permutation(const std::vector<T> &vec, Compare compare)
+{
+    std::vector<label> p(vec.size());
+    std::iota(p.begin(), p.end(), 0);
+    std::stable_sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) {
+        return compare(vec[i], vec[j]);
+    });
+    return p;
+}
+
+/** @class Collects functions to repartition communication patterns
+ * and matrices. Here repartitioning refers to changing a given partitioning on
+ * n ranks to m ranks with n>=m
+ */
+class Repartitioner {
+private:
+    using partition = gko::experimental::distributed::Partition<label, label>;
+
+    const label size_;  //! Size (n matrix rows) before repartitioning
+
+    const label repart_size_;  //! Size after repartitioning
+
+    const label ranks_per_gpu_;
+
+    const label verbose_;
+
+    std::shared_ptr<const partition> orig_partition_;
+
+public:
+    Repartitioner(label size, label ranks_per_gpu, label verbose,
+                  const ExecutorHandler &exec_handler)
+        : size_(size),
+          repart_size_(Repartitioner::compute_repart_size(size, ranks_per_gpu,
+                                                          exec_handler)),
+          ranks_per_gpu_(ranks_per_gpu),
+          verbose_(verbose),
+          orig_partition_(gko::share(
+              gko::experimental::distributed::build_partition_from_local_size<
+                  label, label>(exec_handler.get_ref_exec(),
+                                *exec_handler.get_communicator().get(),
+                                size))){};
+
+    /* returns the owner rank for a given rank */
+    label get_owner_rank(label rank) const
+    {
+        return compute_owner_rank(rank, ranks_per_gpu_);
+    };
+
+    /* returns the owner rank for a given rank */
+    label get_owner_rank(const ExecutorHandler &exec_handler) const
+    {
+        return get_owner_rank(exec_handler.get_rank());
+    };
+
+    /* returns if current rank is an owner  */
+    bool is_owner(const ExecutorHandler &exec_handler) const
+    {
+        return exec_handler.get_rank() ==
+               get_owner_rank(exec_handler.get_rank());
+    };
+
+    /* @brief check if the given rank gets local after repartitioning
+     *
+     * */
+    bool reparts_to_local(const ExecutorHandler &exec_handler, label rank) const
+    {
+        return get_owner_rank(exec_handler) ==
+               compute_owner_rank(rank, ranks_per_gpu_);
+    };
+
+    label get_ranks_per_gpu() const { return ranks_per_gpu_; }
+
+    /* @brief computes the size of the submatrix owned by a given rank after
+     * repartitioning
+     *
+     * Given a local size (can represent nrows or nnzs) the new size of
+     * this rank after repartitioning gets computed
+     */
+    static label compute_repart_size(label local_size, label ranks_per_gpu,
+                                     const ExecutorHandler &exec_handler);
+
+    label get_repart_size() const { return repart_size_; }
+
+    // TODO pass original dim as argument, if size can also be nnz
+    gko::dim<2> get_repart_dim() const
+    {
+        return gko::dim<2>{static_cast<gko::size_type>(repart_size_),
+                           static_cast<gko::size_type>(repart_size_)};
+    }
+
+    std::shared_ptr<const partition> get_orig_partition() const
+    {
+        return orig_partition_;
+    };
+
+
+    /* @brief given received interfaces this function sorts interfaces into
+     *local and non local returns: SparsityPattern storing new non_local
+     *sparsity pattern, and a pair of locality information, where the first bool
+     *stores if the interface performs a local communication and the second bool
+     *if the the interface was originally from this rank
+     */
+    std::pair<SparsityPattern, std::vector<bool>> build_non_local_interfaces(
+        const ExecutorHandler &exec_handler, SparsityPattern &loc,
+        const SparsityPattern &non_loc) const;
+
+
+    /* @brief function to repartition a sparsity pattern
+     *
+     * function to repartition a sparsity pattern based on ranks_per_gpu
+     * ie changing a sparsity pattern which has rows on each rank to a sparsity
+     * pattern which has rows only on every ranks_per_gpu-th rank (owner)
+     *
+     * @param exec_handler the executor handler
+     * @param src_local_pattern the original sparsity pattern of the local
+     *matrix
+     * @param src_non_local_pattern the original sparsity pattern of the non
+     *local matrix
+     * returns the new sparsity patterns (local and non-local) and a vector
+     * tracking the interfaces. This contains a pair where the first entry
+     *(bool)
+     * signals whether this is a new local interface (no communication), and
+     *the
+     * second entry (label) tracks the original rank of the interface
+     */
+    std::tuple<std::shared_ptr<SparsityPattern>,
+               std::shared_ptr<SparsityPattern>,
+               std::vector<std::pair<bool, label>>>
+    repartition_sparsity(
+        const ExecutorHandler &exec_handler,
+        std::shared_ptr<const SparsityPattern> src_local_pattern,
+        std::shared_ptr<const SparsityPattern> src_non_local_pattern) const;
+
+    /* @brief repartition a communication pattern
+     * function to repartition a communication pattern based on ranks_per_gpu
+     * ie changing a communication pattern which has rows on each rank and where
+     * every rank communicates to a communication pattern which has rows only on
+     * every ranks_per_gpu-th rank and thus only communicates between owner
+     * ranks
+     *
+     * @param src_comm_pattern the original communication pattern of the current
+     * rank belonging to the distributed matrix
+     * @param partition the original partition belonging to the distributed
+     * matrix returns the repartitioned communcation pattern
+     */
+    std::shared_ptr<const CommunicationPattern> repartition_comm_pattern(
+        const ExecutorHandler &exec_handler,
+        std::shared_ptr<const CommunicationPattern> src_comm_pattern,
+        std::shared_ptr<
+            const gko::experimental::distributed::Partition<label, label>>
+            partition) const;
+};
diff --git a/include/OGL/common.H b/include/OGL/common.H
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 
 #pragma once
+#include <ginkgo/ginkgo.hpp>
 
 #include "fvCFD.H"
 #include "regIOobject.H"
@@ -12,7 +13,6 @@
 #endif
 
 #include <string.h>
-#include <ginkgo/ginkgo.hpp>
 
 namespace Foam {
 
@@ -100,6 +100,9 @@ std::ostream &operator<<(
     std::ostream &os,
     const std::shared_ptr<gko::matrix::Dense<scalar>> array_in);
 
+std::ostream &operator<<(std::ostream &os, const std::vector<label> &in);
+
+
 void export_system(const word fieldName, const gko::matrix::Csr<scalar> *A,
                    const gko::matrix::Dense<scalar> *x,
                    const gko::matrix::Dense<scalar> *b, const word time);