hpsim · greole · Aug 9, 2024 · Aug 9, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/include/OGL/DevicePersistent/ExecutorHandler.H b/include/OGL/DevicePersistent/ExecutorHandler.H
@@ -176,22 +176,21 @@ public:
  MPI_COMM_WORLD, gko_force_host_buffer_);
  }
 
- std::shared_ptr<gko::experimental::mpi::communicator>
+ std::shared_ptr<const gko::experimental::mpi::communicator>
  get_gko_mpi_device_comm() const
  {
  return this->device_comm_;
  }
 
- std::shared_ptr<gko::experimental::mpi::communicator> get_communicator()
-   const
+ std::shared_ptr<const gko::experimental::mpi::communicator>
+ get_communicator() const
  {
  return this->device_comm_;
  }
 
- label get_rank() const { return get_communicator().get()->rank(); };
+ label get_rank() const { return get_communicator()->rank(); };
 };
 
 using PersistentExecutor = ExecutorHandler;
 
 } // namespace Foam
-// namespace Foam
diff --git a/include/OGL/MatrixWrapper/SparsityPattern.H b/include/OGL/MatrixWrapper/SparsityPattern.H
@@ -15,32 +15,61 @@ namespace Foam {
  *
  * After merging sparsity patterns during repartitioning the ldu mapping are
  * not consecutive ie they could look like [0 1 2 3 | 0 1 2 3 | 0 1], where
- * | is the former rank boundary based on the comm pattern we compute a new
+ * | is the former rank boundary. Based on the comm pattern we compute a new
  * mapping as [ 0 1 2 3 | 4 5 6 7| 8 9 ] where the following offsets [ 0 |
  * 4 | 8 ] based on the recv_counts are used.
  *
  * */
 void make_ldu_mapping_consecutive(const AllToAllPattern &comm_pattern,
- gko::array<label> &ldu_mapping, label rank,
+ std::vector<label> &ldu_mapping, label rank,
  label ranks_per_gpu);
 
+/* @brief computes the dimensions of square matrix based row index array
+ *
+ * @note assumes that rows are ordered
+ * @params ordered array of row indices
+ * returns dimensions
+ * */
+gko::dim<2> compute_dimensions(const std::vector<label> &rows);
+
+
 /* The SparsityPattern holds row and column index data using gko::arrays.
  * This struct is used for easy generation of (distributed) ginkgo matrices.
  *
  * Additionally it keeps track which parts of the sparsity pattern belongs to
  * which interface.
- *
  * */
 struct SparsityPattern {
  /* constructor from vectors, assumes a single interface */
- SparsityPattern(std::vector<label> rows, std::vector<label> cols,
- std::vector<label> mapping, std::vector<label> rank)
- : num_nnz(rows.size())
+ SparsityPattern(std::shared_ptr<const gko::Executor> exec, gko::dim<2> dim_,
+ const std::vector<label> &rows,
+ const std::vector<label> &cols,
+ const std::vector<label> &mapping,
+ const std::vector<gko::span> &spans_,
+ const std::vector<label> &rank_)
+ : num_nnz(rows.size()),
+ row_idxs(exec, rows.begin(), rows.end()),
+ col_idxs(exec, cols.begin(), cols.end()),
+ ldu_mapping(exec, mapping.begin(), mapping.end()),
+ dim(dim_),
+ spans(spans_),
+ rank(rank_)
  {
- // NOT fully implementented
- FatalErrorInFunction << "Not implemented" << abort(FatalError);
+ // For every rank there should be a span
+ ASSERT_EQ(spans.size(), rank.size());
+
+ ASSERT_EQ(rows.size(), cols.size());
+ ASSERT_EQ(rows.size(), mapping.size());
  }
 
+ SparsityPattern(std::shared_ptr<const gko::Executor> exec)
+ : num_nnz(0),
+ dim(gko::dim<2>{}),
+ row_idxs{exec},
+ col_idxs{exec},
+ ldu_mapping{exec}
+ {}
+
  SparsityPattern(std::shared_ptr<const gko::Executor> exec, const label size)
  : num_nnz(size),
  row_idxs{exec, static_cast<gko::size_type>(size)},
@@ -96,10 +125,14 @@ struct SparsityPattern {
 
  // A vector of spans indicating begin and end of each interface
  // this is used to keep all col and row idx consecutive in memory
+ // TODO this can be refactored, since its original motivation
+ // was to support the distributed_read function, which is not
+ // needed anymore. However, removing would require row and col_idxs to
+ // be stored in a std::vector<gko::Array>
  mutable std::vector<gko::span> spans;
 
- // keep track of original (communication) rank of each interface or matrix
- // block
+ // keep track of original (local sparsity) or communication (non-local) rank
+ // of each interface or matrix block
  mutable std::vector<label> rank;
 };
 

diff --git a/include/OGL/Repartitioner.H b/include/OGL/Repartitioner.H
@@ -11,27 +11,76 @@
 #include "CommunicationPattern.H"
 #include "MatrixWrapper/SparsityPattern.H"
 
-template <typename T>
-std::vector<T> apply_permutation(const std::vector<T> vec,
- const std::vector<label> &p)
+namespace detail {
+
+/* Convert to global
+** Given an array of column indices local to the communication rank
+** this function offsets these array
+** @param idx pointer to gko::array holding the indices which need to be
+*converted from local to global ids
+** @param spans start and ends of the interfaces
+** @param ranks the rank to which the interface index is a local row index
+*/
+std::vector<label> convert_to_global(
+ std::shared_ptr<
+ const gko::experimental::distributed::Partition<label, label>>
+ partition,
+ const std::vector<label> &idx, const std::vector<gko::span> &spans,
+ const std::vector<label> &ranks)
 {
- std::vector<T> sorted_vec(vec.size());
- std::transform(p.begin(), p.end(), sorted_vec.begin(),
- [&](label i) { return vec[i]; });
- return sorted_vec;
+ std::vector<label> ret;
+ ret.reserve(idx.size());
+
+ for (label i = 0; i < ranks.size(); i++) {
+ auto rank = ranks[i];
+ auto [begin, end] = spans[i];
+ label offset = partition->get_range_bounds()[rank];
+ for (label j = begin; j < end; j++) {
+ ret.push_back(idx.data()[j] + offset);
+ }
+ }
+ return ret;
 }
 
-template <typename T, typename Compare>
-std::vector<label> sort_permutation(const std::vector<T> &vec, Compare compare)
+/* Convert to local
+** Given an array of column indices local to the communication rank
+** this function offsets these array
+** @param idx pointer to gko::array holding the indices which need to be
+*converted from local to global ids
+** @param spans start and ends of the interfaces
+** @param ranks the rank to which the interface index is a local row index
+*/
+void convert_to_local(
+ std::shared_ptr<
+ const gko::experimental::distributed::Partition<label, label>>
+ partition,
+ std::vector<label> &in, label rank)
 {
- std::vector<label> p(vec.size());
- std::iota(p.begin(), p.end(), 0);
- std::stable_sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) {
- return compare(vec[i], vec[j]);
- });
- return p;
+ label offset = partition->get_range_bounds()[rank];
+ std::transform(in.begin(), in.end(), in.begin(),
+ [&](label idx) { return idx - offset; });
 }
 
+/* @brief exchange spans and rank between owner and non-owner ranks
+**
+** returns a pair vector spans
+*/
+std::tuple<std::vector<gko::span>, std::vector<label>, std::vector<label>>
+exchange_span_ranks(const ExecutorHandler &exec_handler, label ranks_per_gpu,
+ const std::vector<gko::span> &spans,
+ const std::vector<label> &ranks);
+
+template <typename T>
+std::vector<T> apply_permutation(const std::vector<T> vec,
+ const std::vector<label> &p);
+
+/* Sorts a vector and returns the permutation for reuse
+ */
+template <typename T, typename Compare>
+std::vector<label> sort_permutation(const std::vector<T> &vec, Compare compare);
+
+} // namespace detail
+
 /** @class Collects functions to repartition communication patterns
  * and matrices. Here repartitioning refers to changing a given partitioning on
  * n ranks to m ranks with n>=m
@@ -40,9 +89,9 @@ class Repartitioner {
 private:
  using partition = gko::experimental::distributed::Partition<label, label>;
 
- const label size_; //! Size (n matrix rows) before repartitioning
+ const label size_; //! Size (matrix rows) before repartitioning
 
- const label repart_size_; //! Size after repartitioning
+ const label repart_size_; //! Size (matrix rows) after repartitioning
 
  const label ranks_per_gpu_;
 
@@ -115,19 +164,30 @@ public:
  std::shared_ptr<const partition> get_orig_partition() const
  {
  return orig_partition_;
- };
-
+ }
 
- /* @brief given received interfaces this function sorts interfaces into
- *local and non local returns: SparsityPattern storing new non_local
- *sparsity pattern, and a pair of locality information, where the first bool
- *stores if the interface performs a local communication and the second bool
- *if the the interface was originally from this rank
+ /* @brief given gathered sparsity information copy local interfaces
+ *
+ * After gathering local and non-local rows, cols, and mapping
+ * we have interfaces that are now local in the non-local rows, cols and
+ * mapping. Thus this function copies the rows, cols and mapping, spans etc
+ * to the corresponding local vectors.
+ * returns a vector of locality information.
+ * @param non_local_cols vector in comm rank local indices
  */
- std::pair<SparsityPattern, std::vector<bool>> build_non_local_interfaces(
- const ExecutorHandler &exec_handler, SparsityPattern &loc,
- const SparsityPattern &non_loc) const;
-
+ std::vector<bool> build_non_local_interfaces(
+ const ExecutorHandler &exec_handler,
+ std::shared_ptr<
+ const gko::experimental::distributed::Partition<label, label>>
+ partition,
+ std::vector<label> &local_rows, std::vector<label> &local_cols,
+ std::vector<label> &local_mapping, std::vector<label> &local_ranks,
+ std::vector<gko::span> &local_spans, std::vector<label> &non_local_rows,
+ std::vector<label> &non_local_cols,
+ std::vector<label> &non_local_mapping,
+ std::vector<label> &non_local_ranks,
+ std::vector<label> &non_local_rank_origin,
+ std::vector<gko::span> &non_local_spans) const;
 
  /* @brief function to repartition a sparsity pattern
  *
@@ -165,7 +225,7 @@ public:
  * @param src_comm_pattern the original communication pattern of the current
  * rank belonging to the distributed matrix
  * @param partition the original partition belonging to the distributed
- * matrix returns the repartitioned communcation pattern
+ * matrix returns the repartitioned communication pattern
  */
  std::shared_ptr<const CommunicationPattern> repartition_comm_pattern(
  const ExecutorHandler &exec_handler,

diff --git a/include/OGL/common.H b/include/OGL/common.H
@@ -90,10 +90,10 @@ namespace Foam {
 
 #define UNUSED(x) (void)(x)
 
-#define ASSERT_EQ(_val1, _val2)  \
- if (_val1 != _val2) {  \
- throw ::gko::ValueMismatch(__FILE__, __LINE__, __func__, _val1, _val2, \
-  "expected equal values"); \
+#define ASSERT_EQ(_val1, _val2) \
+ if (_val1 != _val2) { \
+ FatalErrorInFunction << "Expected equal values: " << _val1 << " and " \
+ << _val2 << abort(FatalError); \
  }
 
 std::ostream &operator<<(
@@ -131,4 +131,9 @@ void set_next_caching(word sys_matrix_name, const objectRegistry &db,
  label caching);
 
 label get_next_caching(word sys_matrix_name, const objectRegistry &db);
+
+std::shared_ptr<gko::array<label>> convert_to_array(
+ const std::vector<label> &in);
+
+std::vector<label> convert_to_vector(const gko::array<label> &in);
 } // namespace Foam
diff --git a/src/MatrixWrapper/SparsityPattern.C b/src/MatrixWrapper/SparsityPattern.C
@@ -6,13 +6,19 @@
 
 namespace Foam {
 
+gko::dim<2> compute_dimensions(const std::vector<label> &rows)
+{
+ gko::size_type num_rows = rows.back() + 1;
+ return gko::dim<2>{num_rows, num_rows};
+}
+
+
 void make_ldu_mapping_consecutive(const AllToAllPattern &comm_pattern,
- gko::array<label> &ldu_mapping, label rank,
+ std::vector<label> &ldu_mapping, label rank,
  label ranks_per_gpu)
 {
- // TODO check if ldu_mapping is on host exec
  label ldu_offset = 0;
- auto *data = ldu_mapping.get_data();
+ auto *data = ldu_mapping.data();
 
  for (label i = 0; i < ranks_per_gpu; i++) {
  auto size = comm_pattern.recv_counts[i];