From e27ec57c7280a80cf7d0e1f3a1cdf399040dd51a Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Sat, 22 Jun 2024 21:29:45 -0400 Subject: [PATCH 01/18] Make coo2csr a standalone function. --- resolve/matrix/CMakeLists.txt | 2 + resolve/matrix/MatrixHandler.cpp | 283 ++++++++++++++++--------------- resolve/matrix/utilities.cpp | 163 ++++++++++++++++++ resolve/matrix/utilities.hpp | 17 ++ 4 files changed, 325 insertions(+), 140 deletions(-) create mode 100644 resolve/matrix/utilities.cpp create mode 100644 resolve/matrix/utilities.hpp diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index ad38dd5d..d6159742 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -15,6 +15,7 @@ set(Matrix_SRC Coo.cpp MatrixHandler.cpp MatrixHandlerCpu.cpp + utilities.h ) # C++ code that depends on CUDA SDK libraries @@ -35,6 +36,7 @@ set(Matrix_HEADER_INSTALL Csr.hpp Csc.hpp MatrixHandler.hpp + utilities.cpp ) # Build shared library ReSolve::matrix diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 3fc9a492..2a6b46fb 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -116,148 +116,151 @@ namespace ReSolve { */ int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) { - //count nnzs first - index_type nnz_unpacked = 0; - index_type nnz = A_coo->getNnz(); - index_type n = A_coo->getNumRows(); - bool symmetric = A_coo->symmetric(); - bool expanded = A_coo->expanded(); - - index_type* nnz_counts = new index_type[n]; - std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData(memory::HOST); - index_type* coo_cols = A_coo->getColData(memory::HOST); - real_type* coo_vals = A_coo->getValues( memory::HOST); - - index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal - std::fill_n(diag_control, n, 0); - index_type nnz_unpacked_no_duplicates = 0; - index_type nnz_no_duplicates = nnz; - - - //maybe check if they exist? - for (index_type i = 0; i < nnz; ++i) - { - nnz_counts[coo_rows[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) - { - nnz_counts[coo_cols[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - } - if (coo_rows[i] == coo_cols[i]){ - if (diag_control[coo_rows[i]] > 0) { - //duplicate - nnz_unpacked_no_duplicates--; - nnz_no_duplicates--; - } - diag_control[coo_rows[i]]++; - } - } - A_csr->setExpanded(true); - A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); - index_type* csr_ia = new index_type[n+1]; - std::fill_n(csr_ia, n + 1, 0); - index_type* csr_ja = new index_type[nnz_unpacked]; - real_type* csr_a = new real_type[nnz_unpacked]; - index_type* nnz_shifts = new index_type[n]; - std::fill_n(nnz_shifts, n , 0); - - IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - - csr_ia[0] = 0; - - for (index_type i = 1; i < n + 1; ++i){ - csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); - } - - int r, start; - - - for (index_type i = 0; i < nnz; ++i){ - //which row - r = coo_rows[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - } - if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates - bool already_there = false; - for (index_type j = start; j < start + nnz_shifts[r]; ++j) - { - index_type c = tmp[j].getIdx(); - if (c == r) { - real_type val = tmp[j].getValue(); - val += coo_vals[i]; - tmp[j].setValue(val); - already_there = true; - out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; - } - } - if (!already_there){ // first time this duplicates appears - - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - - nnz_shifts[r]++; - } - } else {//not diagonal - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) - { - r = coo_cols[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) - out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - } - } - } - //now sort whatever is inside rows - - for (int i = 0; i < n; ++i) - { - - //now sorting (and adding 1) - int colStart = csr_ia[i]; - int colEnd = csr_ia[i + 1]; - int length = colEnd - colStart; - std::sort(&tmp[colStart],&tmp[colStart] + length); - } - - for (index_type i = 0; i < nnz_unpacked; ++i) - { - csr_ja[i] = tmp[i].getIdx(); - csr_a[i] = tmp[i].getValue(); - } -#if 0 - for (int i = 0; isetNnz(nnz_no_duplicates); + matrix::coo2csr(A_coo, A_csr); A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - delete [] nnz_counts; - delete [] tmp; - delete [] nnz_shifts; - delete [] csr_ia; - delete [] csr_ja; - delete [] csr_a; - delete [] diag_control; +// //count nnzs first +// index_type nnz_unpacked = 0; +// index_type nnz = A_coo->getNnz(); +// index_type n = A_coo->getNumRows(); +// bool symmetric = A_coo->symmetric(); +// bool expanded = A_coo->expanded(); + +// index_type* nnz_counts = new index_type[n]; +// std::fill_n(nnz_counts, n, 0); +// index_type* coo_rows = A_coo->getRowData(memory::HOST); +// index_type* coo_cols = A_coo->getColData(memory::HOST); +// real_type* coo_vals = A_coo->getValues( memory::HOST); + +// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal +// std::fill_n(diag_control, n, 0); +// index_type nnz_unpacked_no_duplicates = 0; +// index_type nnz_no_duplicates = nnz; + + +// //maybe check if they exist? +// for (index_type i = 0; i < nnz; ++i) +// { +// nnz_counts[coo_rows[i]]++; +// nnz_unpacked++; +// nnz_unpacked_no_duplicates++; +// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) +// { +// nnz_counts[coo_cols[i]]++; +// nnz_unpacked++; +// nnz_unpacked_no_duplicates++; +// } +// if (coo_rows[i] == coo_cols[i]){ +// if (diag_control[coo_rows[i]] > 0) { +// //duplicate +// nnz_unpacked_no_duplicates--; +// nnz_no_duplicates--; +// } +// diag_control[coo_rows[i]]++; +// } +// } +// A_csr->setExpanded(true); +// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); +// index_type* csr_ia = new index_type[n+1]; +// std::fill_n(csr_ia, n + 1, 0); +// index_type* csr_ja = new index_type[nnz_unpacked]; +// real_type* csr_a = new real_type[nnz_unpacked]; +// index_type* nnz_shifts = new index_type[n]; +// std::fill_n(nnz_shifts, n , 0); + +// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; + +// csr_ia[0] = 0; + +// for (index_type i = 1; i < n + 1; ++i){ +// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); +// } + +// int r, start; + + +// for (index_type i = 0; i < nnz; ++i){ +// //which row +// r = coo_rows[i]; +// start = csr_ia[r]; + +// if ((start + nnz_shifts[r]) > nnz_unpacked) { +// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; +// } +// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates +// bool already_there = false; +// for (index_type j = start; j < start + nnz_shifts[r]; ++j) +// { +// index_type c = tmp[j].getIdx(); +// if (c == r) { +// real_type val = tmp[j].getValue(); +// val += coo_vals[i]; +// tmp[j].setValue(val); +// already_there = true; +// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; +// } +// } +// if (!already_there){ // first time this duplicates appears + +// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + +// nnz_shifts[r]++; +// } +// } else {//not diagonal +// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); +// nnz_shifts[r]++; + +// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) +// { +// r = coo_cols[i]; +// start = csr_ia[r]; + +// if ((start + nnz_shifts[r]) > nnz_unpacked) +// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; +// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); +// nnz_shifts[r]++; +// } +// } +// } +// //now sort whatever is inside rows + +// for (int i = 0; i < n; ++i) +// { + +// //now sorting (and adding 1) +// int colStart = csr_ia[i]; +// int colEnd = csr_ia[i + 1]; +// int length = colEnd - colStart; +// std::sort(&tmp[colStart],&tmp[colStart] + length); +// } + +// for (index_type i = 0; i < nnz_unpacked; ++i) +// { +// csr_ja[i] = tmp[i].getIdx(); +// csr_a[i] = tmp[i].getValue(); +// } +// #if 0 +// for (int i = 0; isetNnz(nnz_no_duplicates); + // A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); + + // delete [] nnz_counts; + // delete [] tmp; + // delete [] nnz_shifts; + // delete [] csr_ia; + // delete [] csr_ja; + // delete [] csr_a; + // delete [] diag_control; return 0; } diff --git a/resolve/matrix/utilities.cpp b/resolve/matrix/utilities.cpp new file mode 100644 index 00000000..d23850bf --- /dev/null +++ b/resolve/matrix/utilities.cpp @@ -0,0 +1,163 @@ +#include +#include +#include +#include "utilities.hpp" + +namespace ReSolve +{ + namespace matrix + { + /** + * @brief Creates a CSR from a COO matrix. + * + * @param[in] A_coo + * @param[out] A_csr + * @return int - Error code, 0 if successful. + * + * @pre `A_coo` is a valid sparse matrix in unorderes COO format. + * Duplicates are allowed. Up-to-date values and indices must be + * on host. + * + * @post `A_csr` is representing the same matrix as `A_coo` but in CSR + * format. `A_csr` is allocated and stored on host. + * + * @invariant `A_coo` is not changed. + */ + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr) + { + //count nnzs first + index_type nnz_unpacked = 0; + index_type nnz = A_coo->getNnz(); + index_type n = A_coo->getNumRows(); + bool symmetric = A_coo->symmetric(); + bool expanded = A_coo->expanded(); + + index_type* nnz_counts = new index_type[n]; + std::fill_n(nnz_counts, n, 0); + index_type* coo_rows = A_coo->getRowData(memory::HOST); + index_type* coo_cols = A_coo->getColData(memory::HOST); + real_type* coo_vals = A_coo->getValues( memory::HOST); + + index_type* diag_control = new index_type[n]; // for DEDUPLICATION of the diagonal + std::fill_n(diag_control, n, 0); + index_type nnz_unpacked_no_duplicates = 0; + index_type nnz_no_duplicates = nnz; + + // maybe check if they exist? + for (index_type i = 0; i < nnz; ++i) { + nnz_counts[coo_rows[i]]++; + nnz_unpacked++; + nnz_unpacked_no_duplicates++; + if ((coo_rows[i] != coo_cols[i]) && (symmetric) && (!expanded)) + { + nnz_counts[coo_cols[i]]++; + nnz_unpacked++; + nnz_unpacked_no_duplicates++; + } + if (coo_rows[i] == coo_cols[i]) { + if (diag_control[coo_rows[i]] > 0) { + // duplicate + nnz_unpacked_no_duplicates--; + nnz_no_duplicates--; + } + diag_control[coo_rows[i]]++; + } + } + A_csr->setExpanded(true); + A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); + index_type* csr_ia = new index_type[n+1]; + std::fill_n(csr_ia, n + 1, 0); + index_type* csr_ja = new index_type[nnz_unpacked]; + real_type* csr_a = new real_type[nnz_unpacked]; + index_type* nnz_shifts = new index_type[n]; + std::fill_n(nnz_shifts, n , 0); + + IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; + + csr_ia[0] = 0; + + for (index_type i = 1; i < n + 1; ++i) { + csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); + } + + int r, start; + + + for (index_type i = 0; i < nnz; ++i){ + //which row + r = coo_rows[i]; + start = csr_ia[r]; + + if ((start + nnz_shifts[r]) > nnz_unpacked) { + out::warning() << "index out of bounds (case 1) start: " << start + << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + } + if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates + bool already_there = false; + for (index_type j = start; j < start + nnz_shifts[r]; ++j) + { + index_type c = tmp[j].getIdx(); + if (c == r) { + real_type val = tmp[j].getValue(); + val += coo_vals[i]; + tmp[j].setValue(val); + already_there = true; + out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; + } + } + if (!already_there) { // first time this duplicates appears + + tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); + tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + + nnz_shifts[r]++; + } + } else { // non-diagonal + tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); + tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + nnz_shifts[r]++; + + if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) + { + r = coo_cols[i]; + start = csr_ia[r]; + + if ((start + nnz_shifts[r]) > nnz_unpacked) + out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); + tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + nnz_shifts[r]++; + } + } + } + + //now sort whatever is inside rows + for (int i = 0; i < n; ++i) { + //now sorting (and adding 1) + int colStart = csr_ia[i]; + int colEnd = csr_ia[i + 1]; + int length = colEnd - colStart; + std::sort(&tmp[colStart], &tmp[colStart] + length); + } + + for (index_type i = 0; i < nnz_unpacked; ++i) + { + csr_ja[i] = tmp[i].getIdx(); + csr_a[i] = tmp[i].getValue(); + } + A_csr->setNnz(nnz_no_duplicates); + A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); + + delete [] nnz_counts; + delete [] tmp; + delete [] nnz_shifts; + delete [] csr_ia; + delete [] csr_ja; + delete [] csr_a; + delete [] diag_control; + + return 0; + + } + } +} \ No newline at end of file diff --git a/resolve/matrix/utilities.hpp b/resolve/matrix/utilities.hpp new file mode 100644 index 00000000..20582eaa --- /dev/null +++ b/resolve/matrix/utilities.hpp @@ -0,0 +1,17 @@ +#pragma once + +namespace ReSolve +{ + namespace matrix + { + // Forward declarations + class Coo; + class Csr; + + /// @brief + /// @param A_coo + /// @param A_csr + /// @return + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr); + } +} From d49f5611fd0bfe3fd1ee58354726a3485243f31f Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 24 Jun 2024 15:43:44 -0400 Subject: [PATCH 02/18] Remove duplicated coo2csr code. --- resolve/matrix/CMakeLists.txt | 4 +- resolve/matrix/Csr.cpp | 144 +---------------------------- resolve/matrix/Csr.hpp | 2 - resolve/matrix/MatrixHandler.cpp | 150 +------------------------------ resolve/matrix/utilities.cpp | 53 ++++++----- resolve/matrix/utilities.hpp | 9 +- 6 files changed, 41 insertions(+), 321 deletions(-) diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index d6159742..ba2935ab 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -15,7 +15,7 @@ set(Matrix_SRC Coo.cpp MatrixHandler.cpp MatrixHandlerCpu.cpp - utilities.h + utilities.cpp ) # C++ code that depends on CUDA SDK libraries @@ -36,7 +36,7 @@ set(Matrix_HEADER_INSTALL Csr.hpp Csc.hpp MatrixHandler.hpp - utilities.cpp + utilities.hpp ) # Build shared library ReSolve::matrix diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 325bb76b..308f1a29 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -7,6 +7,7 @@ #include "Coo.hpp" #include #include +#include namespace ReSolve { @@ -35,7 +36,7 @@ namespace ReSolve A_coo->symmetric(), A_coo->expanded()) { - coo2csr(A_coo, memspace); + matrix::coo2csr(A_coo, this, memspace); } /** @@ -374,149 +375,10 @@ namespace ReSolve assert(nnz_ == A_coo->getNnz()); assert(is_symmetric_ == A_coo->symmetric()); // <- Do we need to check for this? - return coo2csr(A_coo, memspaceOut); + return matrix::coo2csr(A_coo, this, memspaceOut); } - int matrix::Csr::coo2csr(matrix::Coo* A_coo, memory::MemorySpace memspace) - { - //count nnzs first - index_type nnz_unpacked = 0; - index_type nnz = A_coo->getNnz(); - index_type n = A_coo->getNumRows(); - bool symmetric = A_coo->symmetric(); - bool expanded = A_coo->expanded(); - - index_type* nnz_counts = new index_type[n]; - std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData(memory::HOST); - index_type* coo_cols = A_coo->getColData(memory::HOST); - real_type* coo_vals = A_coo->getValues( memory::HOST); - - index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal - std::fill_n(diag_control, n, 0); - index_type nnz_unpacked_no_duplicates = 0; - index_type nnz_no_duplicates = nnz; - - - //maybe check if they exist? - for (index_type i = 0; i < nnz; ++i) - { - nnz_counts[coo_rows[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) - { - nnz_counts[coo_cols[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - } - if (coo_rows[i] == coo_cols[i]){ - if (diag_control[coo_rows[i]] > 0) { - //duplicate - nnz_unpacked_no_duplicates--; - nnz_no_duplicates--; - } - diag_control[coo_rows[i]]++; - } - } - this->setExpanded(true); - this->setNnzExpanded(nnz_unpacked_no_duplicates); - index_type* csr_ia = new index_type[n+1]; - std::fill_n(csr_ia, n + 1, 0); - index_type* csr_ja = new index_type[nnz_unpacked]; - real_type* csr_a = new real_type[nnz_unpacked]; - index_type* nnz_shifts = new index_type[n]; - std::fill_n(nnz_shifts, n , 0); - - IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - - csr_ia[0] = 0; - - for (index_type i = 1; i < n + 1; ++i){ - csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); - } - - int r, start; - - - for (index_type i = 0; i < nnz; ++i){ - //which row - r = coo_rows[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - } - if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates - bool already_there = false; - for (index_type j = start; j < start + nnz_shifts[r]; ++j) - { - index_type c = tmp[j].getIdx(); - if (c == r) { - real_type val = tmp[j].getValue(); - val += coo_vals[i]; - tmp[j].setValue(val); - already_there = true; - out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; - } - } - if (!already_there){ // first time this duplicates appears - - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - - nnz_shifts[r]++; - } - } else {//not diagonal - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) - { - r = coo_cols[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) - out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - } - } - } - //now sort whatever is inside rows - - for (int i = 0; i < n; ++i) - { - //now sorting (and adding 1) - int colStart = csr_ia[i]; - int colEnd = csr_ia[i + 1]; - int length = colEnd - colStart; - std::sort(&tmp[colStart],&tmp[colStart] + length); - } - - for (index_type i = 0; i < nnz_unpacked; ++i) - { - csr_ja[i] = tmp[i].getIdx(); - csr_a[i] = tmp[i].getValue(); - } - - this->setNnz(nnz_no_duplicates); - this->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - - delete [] nnz_counts; - delete [] tmp; - delete [] nnz_shifts; - delete [] csr_ia; - delete [] csr_ja; - delete [] csr_a; - delete [] diag_control; - - return 0; - } - /** * @brief Prints matrix data. * diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index 7841a7fb..a3896a1d 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -49,8 +49,6 @@ namespace ReSolve { namespace matrix { int updateFromCoo(matrix::Coo* mat, memory::MemorySpace memspaceOut); - private: - int coo2csr(matrix::Coo* mat, memory::MemorySpace memspace); }; }} // namespace ReSolve::matrix diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 2a6b46fb..127f03b2 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include "MatrixHandler.hpp" #include "MatrixHandlerCpu.hpp" @@ -116,153 +116,7 @@ namespace ReSolve { */ int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) { - matrix::coo2csr(A_coo, A_csr); - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - -// //count nnzs first -// index_type nnz_unpacked = 0; -// index_type nnz = A_coo->getNnz(); -// index_type n = A_coo->getNumRows(); -// bool symmetric = A_coo->symmetric(); -// bool expanded = A_coo->expanded(); - -// index_type* nnz_counts = new index_type[n]; -// std::fill_n(nnz_counts, n, 0); -// index_type* coo_rows = A_coo->getRowData(memory::HOST); -// index_type* coo_cols = A_coo->getColData(memory::HOST); -// real_type* coo_vals = A_coo->getValues( memory::HOST); - -// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal -// std::fill_n(diag_control, n, 0); -// index_type nnz_unpacked_no_duplicates = 0; -// index_type nnz_no_duplicates = nnz; - - -// //maybe check if they exist? -// for (index_type i = 0; i < nnz; ++i) -// { -// nnz_counts[coo_rows[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) -// { -// nnz_counts[coo_cols[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// } -// if (coo_rows[i] == coo_cols[i]){ -// if (diag_control[coo_rows[i]] > 0) { -// //duplicate -// nnz_unpacked_no_duplicates--; -// nnz_no_duplicates--; -// } -// diag_control[coo_rows[i]]++; -// } -// } -// A_csr->setExpanded(true); -// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); -// index_type* csr_ia = new index_type[n+1]; -// std::fill_n(csr_ia, n + 1, 0); -// index_type* csr_ja = new index_type[nnz_unpacked]; -// real_type* csr_a = new real_type[nnz_unpacked]; -// index_type* nnz_shifts = new index_type[n]; -// std::fill_n(nnz_shifts, n , 0); - -// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - -// csr_ia[0] = 0; - -// for (index_type i = 1; i < n + 1; ++i){ -// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); -// } - -// int r, start; - - -// for (index_type i = 0; i < nnz; ++i){ -// //which row -// r = coo_rows[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) { -// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// } -// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates -// bool already_there = false; -// for (index_type j = start; j < start + nnz_shifts[r]; ++j) -// { -// index_type c = tmp[j].getIdx(); -// if (c == r) { -// real_type val = tmp[j].getValue(); -// val += coo_vals[i]; -// tmp[j].setValue(val); -// already_there = true; -// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; -// } -// } -// if (!already_there){ // first time this duplicates appears - -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - -// nnz_shifts[r]++; -// } -// } else {//not diagonal -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; - -// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) -// { -// r = coo_cols[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) -// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; -// } -// } -// } -// //now sort whatever is inside rows - -// for (int i = 0; i < n; ++i) -// { - -// //now sorting (and adding 1) -// int colStart = csr_ia[i]; -// int colEnd = csr_ia[i + 1]; -// int length = colEnd - colStart; -// std::sort(&tmp[colStart],&tmp[colStart] + length); -// } - -// for (index_type i = 0; i < nnz_unpacked; ++i) -// { -// csr_ja[i] = tmp[i].getIdx(); -// csr_a[i] = tmp[i].getValue(); -// } -// #if 0 -// for (int i = 0; isetNnz(nnz_no_duplicates); - // A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - - // delete [] nnz_counts; - // delete [] tmp; - // delete [] nnz_shifts; - // delete [] csr_ia; - // delete [] csr_ja; - // delete [] csr_a; - // delete [] diag_control; - - return 0; + return matrix::coo2csr(A_coo, A_csr, memspace); } /** diff --git a/resolve/matrix/utilities.cpp b/resolve/matrix/utilities.cpp index d23850bf..a7e1a3e8 100644 --- a/resolve/matrix/utilities.cpp +++ b/resolve/matrix/utilities.cpp @@ -1,10 +1,13 @@ #include #include #include +#include +#include #include "utilities.hpp" namespace ReSolve { + using out = io::Logger; namespace matrix { /** @@ -14,46 +17,47 @@ namespace ReSolve * @param[out] A_csr * @return int - Error code, 0 if successful. * - * @pre `A_coo` is a valid sparse matrix in unorderes COO format. + * @pre `A_coo` is a valid sparse matrix in unordered COO format. * Duplicates are allowed. Up-to-date values and indices must be * on host. * - * @post `A_csr` is representing the same matrix as `A_coo` but in CSR - * format. `A_csr` is allocated and stored on host. + * @post `A_csr` is representing the same matrix as `A_coo` but in + * _general_ CSR format. `A_csr` is allocated and stored on host. * * @invariant `A_coo` is not changed. */ - int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr) + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) { //count nnzs first index_type nnz_unpacked = 0; - index_type nnz = A_coo->getNnz(); - index_type n = A_coo->getNumRows(); - bool symmetric = A_coo->symmetric(); - bool expanded = A_coo->expanded(); + const index_type nnz = A_coo->getNnz(); + const index_type n = A_coo->getNumRows(); + const bool symmetric = A_coo->symmetric(); + const bool expanded = A_coo->expanded(); - index_type* nnz_counts = new index_type[n]; + index_type* nnz_counts = new index_type[n]; ///< Number of elements per row std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData(memory::HOST); - index_type* coo_cols = A_coo->getColData(memory::HOST); - real_type* coo_vals = A_coo->getValues( memory::HOST); + const index_type* coo_rows = A_coo->getRowData(memory::HOST); + const index_type* coo_cols = A_coo->getColData(memory::HOST); + const real_type* coo_vals = A_coo->getValues( memory::HOST); index_type* diag_control = new index_type[n]; // for DEDUPLICATION of the diagonal std::fill_n(diag_control, n, 0); index_type nnz_unpacked_no_duplicates = 0; index_type nnz_no_duplicates = nnz; - // maybe check if they exist? + // Count matrix elements for (index_type i = 0; i < nnz; ++i) { nnz_counts[coo_rows[i]]++; nnz_unpacked++; nnz_unpacked_no_duplicates++; - if ((coo_rows[i] != coo_cols[i]) && (symmetric) && (!expanded)) - { + // Count elements added after expansion + if ((coo_rows[i] != coo_cols[i]) && (symmetric) && (!expanded)) { nnz_counts[coo_cols[i]]++; nnz_unpacked++; nnz_unpacked_no_duplicates++; } + // Bookkeeping of diagonal elements that were counted if (coo_rows[i] == coo_cols[i]) { if (diag_control[coo_rows[i]] > 0) { // duplicate @@ -65,6 +69,8 @@ namespace ReSolve } A_csr->setExpanded(true); A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); + + // Allocate matrix format conversion workspace index_type* csr_ia = new index_type[n+1]; std::fill_n(csr_ia, n + 1, 0); index_type* csr_ja = new index_type[nnz_unpacked]; @@ -74,16 +80,15 @@ namespace ReSolve IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; + // Set CSR row pointers csr_ia[0] = 0; - for (index_type i = 1; i < n + 1; ++i) { csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); } int r, start; - - for (index_type i = 0; i < nnz; ++i){ + for (index_type i = 0; i < nnz; ++i) { //which row r = coo_rows[i]; start = csr_ia[r]; @@ -102,7 +107,8 @@ namespace ReSolve val += coo_vals[i]; tmp[j].setValue(val); already_there = true; - out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; + out::warning() << " duplicate found, row " << c + << " adding in place " << j << " current value: " << val << std::endl; } } if (!already_there) { // first time this duplicates appears @@ -117,13 +123,14 @@ namespace ReSolve tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); nnz_shifts[r]++; - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) - { + if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) { r = coo_cols[i]; start = csr_ia[r]; - if ((start + nnz_shifts[r]) > nnz_unpacked) - out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + if ((start + nnz_shifts[r]) > nnz_unpacked) { + out::warning() << "index out of bounds (case 2) start: " << start + << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; + } tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); nnz_shifts[r]++; diff --git a/resolve/matrix/utilities.hpp b/resolve/matrix/utilities.hpp index 20582eaa..ecaee79e 100644 --- a/resolve/matrix/utilities.hpp +++ b/resolve/matrix/utilities.hpp @@ -1,5 +1,7 @@ #pragma once +#include + namespace ReSolve { namespace matrix @@ -8,10 +10,7 @@ namespace ReSolve class Coo; class Csr; - /// @brief - /// @param A_coo - /// @param A_csr - /// @return - int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr); + /// @brief Converts symmetric or general COO to general CSR matrix + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace); } } From 52fa85db2b49e2b3d09691591a62b6786099b74d Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 25 Jun 2024 09:08:29 -0400 Subject: [PATCH 03/18] Fix missing header for sort function. --- resolve/matrix/utilities.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/resolve/matrix/utilities.cpp b/resolve/matrix/utilities.cpp index a7e1a3e8..7ad866f5 100644 --- a/resolve/matrix/utilities.cpp +++ b/resolve/matrix/utilities.cpp @@ -1,3 +1,4 @@ +#include #include #include #include From 4af64c54d68738911b6d48531e583ce2dc17e4f6 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Fri, 5 Jul 2024 14:12:41 -0400 Subject: [PATCH 04/18] clean-slate coo2csr implementation + tests --- resolve/matrix/CMakeLists.txt | 4 +- resolve/matrix/Csr.cpp | 2 +- resolve/matrix/MatrixHandler.cpp | 2 +- resolve/matrix/Utilities.cpp | 225 ++++++++++++++++++ .../matrix/{utilities.hpp => Utilities.hpp} | 0 resolve/matrix/utilities.cpp | 171 ------------- tests/unit/matrix/CMakeLists.txt | 7 +- tests/unit/matrix/MatrixConversionTests.hpp | 213 +++++++++++++++++ .../unit/matrix/runMatrixConversionTests.cpp | 18 ++ 9 files changed, 466 insertions(+), 176 deletions(-) create mode 100644 resolve/matrix/Utilities.cpp rename resolve/matrix/{utilities.hpp => Utilities.hpp} (100%) delete mode 100644 resolve/matrix/utilities.cpp create mode 100644 tests/unit/matrix/MatrixConversionTests.hpp create mode 100644 tests/unit/matrix/runMatrixConversionTests.cpp diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index ba2935ab..6a92975e 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -15,7 +15,7 @@ set(Matrix_SRC Coo.cpp MatrixHandler.cpp MatrixHandlerCpu.cpp - utilities.cpp + Utilities.cpp ) # C++ code that depends on CUDA SDK libraries @@ -36,7 +36,7 @@ set(Matrix_HEADER_INSTALL Csr.hpp Csc.hpp MatrixHandler.hpp - utilities.hpp + Utilities.hpp ) # Build shared library ReSolve::matrix diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 308f1a29..4735e68a 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -7,7 +7,7 @@ #include "Coo.hpp" #include #include -#include +#include namespace ReSolve { diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 127f03b2..b61a93b5 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include "MatrixHandler.hpp" #include "MatrixHandlerCpu.hpp" diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp new file mode 100644 index 00000000..50e9852c --- /dev/null +++ b/resolve/matrix/Utilities.cpp @@ -0,0 +1,225 @@ +#include +#include +#include + +#include "Utilities.hpp" + +#include +#include +#include +#include +#include + +namespace ReSolve +{ + using out = io::Logger; + namespace matrix + { + /** + * @brief Creates a CSR from a COO matrix. + * + * @param[in] A + * @param[out] B + * @return int - Error code, 0 if successful. + * + * @pre `A` is a valid sparse matrix in unordered COO format. Duplicates are allowed. + * Up-to-date values and indices must be on the host. + * + * @post `B` represents the same matrix as `A` but is in the CSR format. `B` is + * allocated and stored on the host. + * + * @invariant `A` is not changed. + */ + int coo2csr(matrix::Coo* A, matrix::Csr* B, memory::MemorySpace memspace) + { + // NOTE: a note on deduplication: + // currently, this function allocates more memory than necessary to contain + // the matrix. this is because it's impossible to cleanly check the amount + // of space each row needs ahead of time without storing a full dense matrix + // with the dimensions of A + // + // additionally, this necessitates that we shrink the rows to remove this + // unused space, which is costly and necessitates a lot of shifting + // + // one potential solution to this that i thought of was to store an array of + // bloom filters (maybe u64s or u128s) of the size of the number of columns, + // each filter indicating if there is a value stored at that column in a row + // + // if, during the preprocessing step, we encounter a potential duplicate, we + // backtrack to see if there actually is one. given that the number of + // duplicates is probably small, this shouldn't carry too much of a + // performance penalty + // + // if performance becomes a problem, this could be coupled with arrays + // maintaining the last and/or first seen indices in the coo arrays at which + // a row had an associated value, to speed up the backtracking process + // + // this would be applied during the first allocation phase when an element + // for a row is encountered, prior to the increment of its size in + // `new_rows` + + index_type* rows = A->getRowData(memory::HOST); + index_type* columns = A->getColData(memory::HOST); + real_type* values = A->getValues(memory::HOST); + + if (rows == nullptr || columns == nullptr || values == nullptr) { + return 0; + } + + index_type nnz = A->getNnz(); + index_type n_rows = A->getNumRows(); + index_type* new_rows = new index_type[n_rows + 1]; + std::fill_n(new_rows, n_rows + 1, 0); + + // NOTE: this is the only auxiliary storage buffer used by this conversion + // function. it is first used to track the number of values on the + // diagonal (if the matrix is symmetric and unexpanded), then it is + // used to track the amount of spaces used in each row's value and + // column data + std::unique_ptr used(new index_type[n_rows]); + std::fill_n(used.get(), n_rows, 0); + + // allocation stage + + // NOTE: so aside from tracking the number of mirrored values for allocation + // purposes, we also have to compute the amount of nonzeroes in each row + // since we're dealing with a COO input matrix. we store these values in + // row + 1 slots in new_rows, to avoid allocating extra space :) + + if (!A->symmetric() || A->expanded()) { + // NOTE: this branch is special cased because there is a bunch of extra + // bookkeeping done if a matrix is symmetric and unexpanded + for (index_type i = 0; i < nnz; i++) { + new_rows[rows[i] + 1]++; + } + } else { + bool upper_triangular = false; + for (index_type i = 0; i < nnz; i++) { + new_rows[rows[i] + 1]++; + if (rows[i] != columns[i]) { + used[columns[i]]++; + + if (rows[i] > columns[i] && upper_triangular) { + assert(false && "a matrix indicated to be symmetric triangular was not actually symmetric triangular"); + return -1; + } + upper_triangular = rows[i] < columns[i]; + } + } + } + + for (index_type row = 0; row < n_rows; row++) { + new_rows[row + 1] += new_rows[row] + used[row]; + used[row] = 0; + } + + index_type* new_columns = new index_type[new_rows[n_rows]]; + // TODO: we need to find a better way to fix this. what this fixes is + // that otherwise, the column values are all zero by default and + // this seems to mess with the way the insertion position is + // selected if the column of the pair we're looking to insert + // is zero + std::fill_n(new_columns, new_rows[n_rows], -1); + real_type* new_values = new real_type[new_rows[n_rows]]; + + // fill stage + + for (index_type i = 0; i < nnz; i++) { + index_type insertion_pos = + static_cast( + std::lower_bound(&new_columns[new_rows[rows[i]]], + &new_columns[new_rows[rows[i]] + used[rows[i]]], + columns[i]) - + new_columns); + + // NOTE: the stuff beyond here is basically insertion sort. i'm not + // sure if it's the best way of going about sorting the + // column-value pairs, but it seemed to me to be the most + // natural way of going about this. the only potential + // benefit to using something else (that i can forsee) would + // be that on really large matrices with many nonzeroes in a + // row, something like mergesort might be better or maybe + // a reimpl of plain std::sort. the advantage offered by + // insertion sort here is that we can do it while we fill + // the row, as opposed to doing sorting in a postprocessing + // step as was done prior + + if (new_columns[insertion_pos] == columns[i]) { + new_values[insertion_pos] = values[i]; + } else { + for (index_type offset = new_rows[rows[i]] + used[rows[i]]++; + offset > insertion_pos; + offset--) { + std::swap(new_columns[offset], new_columns[offset - 1]); + std::swap(new_values[offset], new_values[offset - 1]); + } + + new_columns[insertion_pos] = columns[i]; + new_values[insertion_pos] = values[i]; + } + + if ((A->symmetric() && !A->expanded()) && (columns[i] != rows[i])) { + index_type mirrored_insertion_pos = + static_cast( + std::lower_bound(&new_columns[new_rows[columns[i]]], + &new_columns[new_rows[columns[i]] + used[columns[i]]], + rows[i]) - + new_columns); + + if (new_columns[mirrored_insertion_pos] == rows[i]) { + new_values[mirrored_insertion_pos] = values[i]; + } else { + for (index_type offset = new_rows[columns[i]] + used[columns[i]]++; + offset > mirrored_insertion_pos; + offset--) { + std::swap(new_columns[offset], new_columns[offset - 1]); + std::swap(new_values[offset], new_values[offset - 1]); + } + + new_columns[mirrored_insertion_pos] = rows[i]; + new_values[mirrored_insertion_pos] = values[i]; + } + } + } + + // backshifting stage + + for (index_type row = 0; row < n_rows - 1; row++) { + index_type row_nnz = new_rows[row + 1] - new_rows[row]; + if (used[row] != row_nnz) { + index_type correction = row_nnz - used[row]; + + for (index_type corrected_row = row + 1; + corrected_row < n_rows; + corrected_row++) { + for (index_type offset = new_rows[corrected_row]; + offset < new_rows[corrected_row + 1]; + offset++) { + new_columns[offset - correction] = new_columns[offset]; + new_values[offset - correction] = new_values[offset]; + } + + new_rows[corrected_row] -= correction; + } + + new_rows[n_rows] -= correction; + } + } + + if (B->destroyMatrixData(memory::HOST) != 0 || + B->setMatrixData(new_rows, new_columns, new_values, memory::HOST) != 0) { + assert(false && "invalid state after coo -> csr conversion"); + return -1; + } + + // TODO: set data ownership / value ownership here. we'd be leaking + // memory here otherwise + + B->setSymmetric(A->symmetric()); + B->setNnz(new_rows[n_rows]); + B->setExpanded(true); + + return 0; + } + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/utilities.hpp b/resolve/matrix/Utilities.hpp similarity index 100% rename from resolve/matrix/utilities.hpp rename to resolve/matrix/Utilities.hpp diff --git a/resolve/matrix/utilities.cpp b/resolve/matrix/utilities.cpp deleted file mode 100644 index 7ad866f5..00000000 --- a/resolve/matrix/utilities.cpp +++ /dev/null @@ -1,171 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "utilities.hpp" - -namespace ReSolve -{ - using out = io::Logger; - namespace matrix - { - /** - * @brief Creates a CSR from a COO matrix. - * - * @param[in] A_coo - * @param[out] A_csr - * @return int - Error code, 0 if successful. - * - * @pre `A_coo` is a valid sparse matrix in unordered COO format. - * Duplicates are allowed. Up-to-date values and indices must be - * on host. - * - * @post `A_csr` is representing the same matrix as `A_coo` but in - * _general_ CSR format. `A_csr` is allocated and stored on host. - * - * @invariant `A_coo` is not changed. - */ - int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) - { - //count nnzs first - index_type nnz_unpacked = 0; - const index_type nnz = A_coo->getNnz(); - const index_type n = A_coo->getNumRows(); - const bool symmetric = A_coo->symmetric(); - const bool expanded = A_coo->expanded(); - - index_type* nnz_counts = new index_type[n]; ///< Number of elements per row - std::fill_n(nnz_counts, n, 0); - const index_type* coo_rows = A_coo->getRowData(memory::HOST); - const index_type* coo_cols = A_coo->getColData(memory::HOST); - const real_type* coo_vals = A_coo->getValues( memory::HOST); - - index_type* diag_control = new index_type[n]; // for DEDUPLICATION of the diagonal - std::fill_n(diag_control, n, 0); - index_type nnz_unpacked_no_duplicates = 0; - index_type nnz_no_duplicates = nnz; - - // Count matrix elements - for (index_type i = 0; i < nnz; ++i) { - nnz_counts[coo_rows[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - // Count elements added after expansion - if ((coo_rows[i] != coo_cols[i]) && (symmetric) && (!expanded)) { - nnz_counts[coo_cols[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - } - // Bookkeeping of diagonal elements that were counted - if (coo_rows[i] == coo_cols[i]) { - if (diag_control[coo_rows[i]] > 0) { - // duplicate - nnz_unpacked_no_duplicates--; - nnz_no_duplicates--; - } - diag_control[coo_rows[i]]++; - } - } - A_csr->setExpanded(true); - A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); - - // Allocate matrix format conversion workspace - index_type* csr_ia = new index_type[n+1]; - std::fill_n(csr_ia, n + 1, 0); - index_type* csr_ja = new index_type[nnz_unpacked]; - real_type* csr_a = new real_type[nnz_unpacked]; - index_type* nnz_shifts = new index_type[n]; - std::fill_n(nnz_shifts, n , 0); - - IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - - // Set CSR row pointers - csr_ia[0] = 0; - for (index_type i = 1; i < n + 1; ++i) { - csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); - } - - int r, start; - - for (index_type i = 0; i < nnz; ++i) { - //which row - r = coo_rows[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 1) start: " << start - << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - } - if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates - bool already_there = false; - for (index_type j = start; j < start + nnz_shifts[r]; ++j) - { - index_type c = tmp[j].getIdx(); - if (c == r) { - real_type val = tmp[j].getValue(); - val += coo_vals[i]; - tmp[j].setValue(val); - already_there = true; - out::warning() << " duplicate found, row " << c - << " adding in place " << j << " current value: " << val << std::endl; - } - } - if (!already_there) { // first time this duplicates appears - - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - - nnz_shifts[r]++; - } - } else { // non-diagonal - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) { - r = coo_cols[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 2) start: " << start - << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - } - tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - } - } - } - - //now sort whatever is inside rows - for (int i = 0; i < n; ++i) { - //now sorting (and adding 1) - int colStart = csr_ia[i]; - int colEnd = csr_ia[i + 1]; - int length = colEnd - colStart; - std::sort(&tmp[colStart], &tmp[colStart] + length); - } - - for (index_type i = 0; i < nnz_unpacked; ++i) - { - csr_ja[i] = tmp[i].getIdx(); - csr_a[i] = tmp[i].getValue(); - } - A_csr->setNnz(nnz_no_duplicates); - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - - delete [] nnz_counts; - delete [] tmp; - delete [] nnz_shifts; - delete [] csr_ia; - delete [] csr_ja; - delete [] csr_a; - delete [] diag_control; - - return 0; - - } - } -} \ No newline at end of file diff --git a/tests/unit/matrix/CMakeLists.txt b/tests/unit/matrix/CMakeLists.txt index 5b939056..d313248a 100644 --- a/tests/unit/matrix/CMakeLists.txt +++ b/tests/unit/matrix/CMakeLists.txt @@ -18,6 +18,10 @@ target_link_libraries(runMatrixHandlerTests.exe PRIVATE ReSolve resolve_matrix) add_executable(runMatrixFactorizationTests.exe runMatrixFactorizationTests.cpp) target_link_libraries(runMatrixFactorizationTests.exe PRIVATE ReSolve resolve_matrix) +# Build matrix conversion tests +add_executable(runMatrixConversionTests.exe runMatrixConversionTests.cpp) +target_link_libraries(runMatrixConversionTests.exe PRIVATE ReSolve resolve_matrix) + # Build LUSOL-related tests if(RESOLVE_USE_LUSOL) add_executable(runLUSOLTests.exe runLUSOLTests.cpp) @@ -25,7 +29,7 @@ if(RESOLVE_USE_LUSOL) endif() # Install tests -set(installable_tests runMatrixIoTests.exe runMatrixHandlerTests.exe runMatrixFactorizationTests.exe) +set(installable_tests runMatrixIoTests.exe runMatrixHandlerTests.exe runMatrixFactorizationTests.exe runMatrixConversionTests.exe) if(RESOLVE_USE_LUSOL) list(APPEND installable_tests runLUSOLTests.exe) endif() @@ -35,6 +39,7 @@ install(TARGETS ${installable_tests} add_test(NAME matrix_test COMMAND $) add_test(NAME matrix_handler_test COMMAND $) add_test(NAME matrix_factorization_test COMMAND $) +add_test(NAME matrix_conversion_test COMMAND $) if(RESOLVE_USE_LUSOL) add_test(NAME lusol_test COMMAND $) endif() diff --git a/tests/unit/matrix/MatrixConversionTests.hpp b/tests/unit/matrix/MatrixConversionTests.hpp new file mode 100644 index 00000000..9fe5cc50 --- /dev/null +++ b/tests/unit/matrix/MatrixConversionTests.hpp @@ -0,0 +1,213 @@ +#pragma once + +#include +#include +#include +#include + +using index_type = ReSolve::index_type; +using real_type = ReSolve::real_type; + +namespace ReSolve +{ + namespace tests + { + /** + * @class Unit tests for matrix conversions + */ + class MatrixConversionTests : TestBase + { + public: + MatrixConversionTests() + { + } + + virtual ~MatrixConversionTests() + { + } + + TestOutcome simpleUpperUnexpandedSymmetricMatrix() + { + TestStatus status; + + matrix::Coo A(simple_upper_unexpanded_symmetric_n_, + simple_upper_unexpanded_symmetric_m_, + simple_upper_unexpanded_symmetric_nnz_, + true, + false); + A.updateData(simple_upper_unexpanded_symmetric_i_, + simple_upper_unexpanded_symmetric_j_, + simple_upper_unexpanded_symmetric_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_symmetric_expected_n_, + simple_symmetric_expected_m_, + simple_symmetric_expected_nnz_, + simple_symmetric_expected_i_, + simple_symmetric_expected_j_, + simple_symmetric_expected_a_); + + return status.report(__func__); + } + + TestOutcome simpleLowerUnexpandedSymmetricMatrix() + { + TestStatus status; + + matrix::Coo A(simple_lower_unexpanded_symmetric_n_, + simple_lower_unexpanded_symmetric_m_, + simple_lower_unexpanded_symmetric_nnz_, + true, + false); + A.updateData(simple_lower_unexpanded_symmetric_i_, + simple_lower_unexpanded_symmetric_j_, + simple_lower_unexpanded_symmetric_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_symmetric_expected_n_, + simple_symmetric_expected_m_, + simple_symmetric_expected_nnz_, + simple_symmetric_expected_i_, + simple_symmetric_expected_j_, + simple_symmetric_expected_a_); + + return status.report(__func__); + } + + TestOutcome simpleMainDiagonalOnlyMatrix() + { + TestStatus status; + + matrix::Coo A(simple_main_diagonal_only_n_, + simple_main_diagonal_only_m_, + simple_main_diagonal_only_nnz_, + true, + false); + A.updateData(simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_main_diagonal_only_n_, + simple_main_diagonal_only_m_, + simple_main_diagonal_only_nnz_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_a_); + + return status.report(__func__); + } + + TestOutcome simpleFullAsymmetricMatrix() + { + TestStatus status; + + matrix::Coo A(simple_asymmetric_n_, + simple_asymmetric_m_, + simple_asymmetric_nnz_); + A.updateData(simple_asymmetric_i_, + simple_asymmetric_j_, + simple_asymmetric_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_asymmetric_expected_n_, + simple_asymmetric_expected_m_, + simple_asymmetric_expected_nnz_, + simple_asymmetric_expected_i_, + simple_asymmetric_expected_j_, + simple_asymmetric_expected_a_); + + return status.report(__func__); + } + + private: + const index_type simple_symmetric_expected_n_ = 5; + const index_type simple_symmetric_expected_m_ = 5; + const index_type simple_symmetric_expected_nnz_ = 8; + index_type simple_symmetric_expected_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + index_type simple_symmetric_expected_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + real_type simple_symmetric_expected_a_[8] = {2.0, 4.0, 6.0, 7.0, 6.0, 7.0, 8.0, 8.0}; + + const index_type simple_upper_unexpanded_symmetric_n_ = 5; + const index_type simple_upper_unexpanded_symmetric_m_ = 5; + const index_type simple_upper_unexpanded_symmetric_nnz_ = 8; + index_type simple_upper_unexpanded_symmetric_i_[8] = {0, 0, 1, 1, 1, 1, 1, 3}; + index_type simple_upper_unexpanded_symmetric_j_[8] = {0, 0, 1, 1, 2, 2, 3, 4}; + real_type simple_upper_unexpanded_symmetric_a_[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + + const index_type simple_main_diagonal_only_n_ = 5; + const index_type simple_main_diagonal_only_m_ = 5; + const index_type simple_main_diagonal_only_nnz_ = 3; + index_type simple_main_diagonal_only_i_j_[3] = {1, 3, 4}; + real_type simple_main_diagonal_only_a_[3] = {1.0, 2.0, 3.0}; + + const index_type simple_lower_unexpanded_symmetric_n_ = 5; + const index_type simple_lower_unexpanded_symmetric_m_ = 5; + const index_type simple_lower_unexpanded_symmetric_nnz_ = 8; + index_type simple_lower_unexpanded_symmetric_i_[8] = {0, 0, 1, 1, 2, 2, 3, 4}; + index_type simple_lower_unexpanded_symmetric_j_[8] = {0, 0, 1, 1, 1, 1, 1, 3}; + real_type simple_lower_unexpanded_symmetric_a_[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + + const index_type simple_asymmetric_n_ = 5; + const index_type simple_asymmetric_m_ = 5; + const index_type simple_asymmetric_nnz_ = 10; + index_type simple_asymmetric_i_[10] = {0, 1, 1, 2, 3, 3, 3, 4, 1, 1}; + index_type simple_asymmetric_j_[10] = {0, 1, 3, 1, 1, 4, 4, 3, 2, 2}; + real_type simple_asymmetric_a_[10] = {2.0, 4.0, 7.0, 9.0, 6.0, 7.0, 8.0, 8.0, 5.0, 6.0}; + + const index_type simple_asymmetric_expected_n_ = 5; + const index_type simple_asymmetric_expected_m_ = 5; + const index_type simple_asymmetric_expected_nnz_ = 8; + index_type simple_asymmetric_expected_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + index_type simple_asymmetric_expected_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + real_type simple_asymmetric_expected_a_[8] = {2.0, 4.0, 6.0, 7.0, 9.0, 6.0, 8.0, 8.0}; + + bool verifyAnswer(matrix::Csr* A, + const index_type& n, + const index_type& m, + const index_type& nnz, + index_type* is, + index_type* js, + real_type* as) + { + if (n != A->getNumRows() || m != A->getNumColumns() || nnz != A->getNnz()) { + return false; + } + + index_type* rows = A->getRowData(memory::HOST); + index_type* columns = A->getColData(memory::HOST); + real_type* values = A->getValues(memory::HOST); + + index_type answer_offset = 0; + for (index_type i = 0; i < A->getNumRows(); i++) { + for (index_type offset = rows[i]; offset < rows[i + 1]; offset++) { + if (i != is[answer_offset] || + columns[offset] != js[answer_offset] || + !isEqual(values[offset], as[answer_offset])) { + return false; + } + answer_offset++; + } + } + + return true; + } + }; + } // namespace tests +} // namespace ReSolve diff --git a/tests/unit/matrix/runMatrixConversionTests.cpp b/tests/unit/matrix/runMatrixConversionTests.cpp new file mode 100644 index 00000000..d8e9c156 --- /dev/null +++ b/tests/unit/matrix/runMatrixConversionTests.cpp @@ -0,0 +1,18 @@ +#include + +#include "MatrixConversionTests.hpp" + +int main() +{ + ReSolve::tests::TestingResults result; + ReSolve::tests::MatrixConversionTests test; + + result += test.simpleUpperUnexpandedSymmetricMatrix(); + result += test.simpleLowerUnexpandedSymmetricMatrix(); + result += test.simpleMainDiagonalOnlyMatrix(); + result += test.simpleFullAsymmetricMatrix(); + + std::cout << std::endl; + + return result.summary(); +} From ad9591c6ef6b516b9c797c4c5071f94096414adb Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Wed, 10 Jul 2024 10:32:40 -0400 Subject: [PATCH 05/18] apply requested adjustments --- resolve/matrix/Utilities.cpp | 71 ++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index 50e9852c..27b98f21 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -79,16 +79,16 @@ namespace ReSolve std::unique_ptr used(new index_type[n_rows]); std::fill_n(used.get(), n_rows, 0); - // allocation stage - - // NOTE: so aside from tracking the number of mirrored values for allocation - // purposes, we also have to compute the amount of nonzeroes in each row - // since we're dealing with a COO input matrix. we store these values in - // row + 1 slots in new_rows, to avoid allocating extra space :) + // allocation stage, the first loop is O(nnz) no matter the branch and the second + // is O(n) + // + // all this does is prepare the row index array based on the nonzeroes stored in + // the input coo matrix. if the matrix is symmetric and either upper or lower + // triangular, it additionally corrects for mirrored values using the `used` array + // and validates the triangularity. the branch is done to avoid the extra work if + // it's not necessary if (!A->symmetric() || A->expanded()) { - // NOTE: this branch is special cased because there is a bunch of extra - // bookkeeping done if a matrix is symmetric and unexpanded for (index_type i = 0; i < nnz; i++) { new_rows[rows[i] + 1]++; } @@ -114,23 +114,27 @@ namespace ReSolve } index_type* new_columns = new index_type[new_rows[n_rows]]; - // TODO: we need to find a better way to fix this. what this fixes is - // that otherwise, the column values are all zero by default and - // this seems to mess with the way the insertion position is - // selected if the column of the pair we're looking to insert - // is zero std::fill_n(new_columns, new_rows[n_rows], -1); real_type* new_values = new real_type[new_rows[n_rows]]; - // fill stage + // fill stage, approximately O(nnz * m) in the worst case + // + // all this does is iterate over the nonzeroes in the coo matrix, + // check to see if a value at that colum already exists using binary search, + // and if it does, then insert the new value at that position (deduplicating + // the matrix), otherwise, it allocates a new spot in the row (where you see + // used[rows[i]]++) and shifts everything over, performing what is effectively + // insertion sort. the lower half is conditioned on the matrix being symmetric + // and stored as either upper-triangular or lower-triangular, and just + // performs the same as what is described above, but with the indices swapped. for (index_type i = 0; i < nnz; i++) { index_type insertion_pos = static_cast( std::lower_bound(&new_columns[new_rows[rows[i]]], &new_columns[new_rows[rows[i]] + used[rows[i]]], - columns[i]) - - new_columns); + columns[i]) + - new_columns); // NOTE: the stuff beyond here is basically insertion sort. i'm not // sure if it's the best way of going about sorting the @@ -163,8 +167,8 @@ namespace ReSolve static_cast( std::lower_bound(&new_columns[new_rows[columns[i]]], &new_columns[new_rows[columns[i]] + used[columns[i]]], - rows[i]) - - new_columns); + rows[i]) + - new_columns); if (new_columns[mirrored_insertion_pos] == rows[i]) { new_values[mirrored_insertion_pos] = values[i]; @@ -182,7 +186,13 @@ namespace ReSolve } } - // backshifting stage + // backshifting stage, approximately O(n^2 * nnz) in the worst case + // (this probably isn't the tightest bound i could apply) + // + // all this does is shift back rows to remove empty space in between them + // by indexing each row in order, checking to see if the amount of used + // spaces is equivalent to the amount of nonzeroes in the row, and if not, + // shifts every subsequent row back the amount of unused spaces for (index_type row = 0; row < n_rows - 1; row++) { index_type row_nnz = new_rows[row + 1] - new_rows[row]; @@ -206,18 +216,25 @@ namespace ReSolve } } - if (B->destroyMatrixData(memory::HOST) != 0 || - B->setMatrixData(new_rows, new_columns, new_values, memory::HOST) != 0) { + B->setSymmetric(A->symmetric()); + B->setNnz(new_rows[n_rows]); + // NOTE: this is necessary because updateData always reads the current nnz from + // this field + B->setNnzExpanded(new_rows[n_rows]); + B->setExpanded(true); + + if (B->updateData(new_rows, new_columns, new_values, memory::HOST, memspace) != 0) { + delete[] new_rows; + delete[] new_columns; + delete[] new_values; + assert(false && "invalid state after coo -> csr conversion"); return -1; } - // TODO: set data ownership / value ownership here. we'd be leaking - // memory here otherwise - - B->setSymmetric(A->symmetric()); - B->setNnz(new_rows[n_rows]); - B->setExpanded(true); + delete[] new_rows; + delete[] new_columns; + delete[] new_values; return 0; } From f8557063d1ab8f4cb951649436006e40a2c9556f Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Wed, 10 Jul 2024 10:40:48 -0400 Subject: [PATCH 06/18] add mention to issue for setting the expanded nnz value --- resolve/matrix/Utilities.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index 27b98f21..aa8b3c1b 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -219,7 +219,7 @@ namespace ReSolve B->setSymmetric(A->symmetric()); B->setNnz(new_rows[n_rows]); // NOTE: this is necessary because updateData always reads the current nnz from - // this field + // this field. see #176 B->setNnzExpanded(new_rows[n_rows]); B->setExpanded(true); From 87e6095b1f53eebd530271f9406bfb24d44d4d8b Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Fri, 12 Jul 2024 14:50:04 -0400 Subject: [PATCH 07/18] revise runtime complexity of the backshifting stage and remove some comments discussing possible implementation changes --- resolve/matrix/Utilities.cpp | 41 +----------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index aa8b3c1b..d542bc42 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -32,32 +32,6 @@ namespace ReSolve */ int coo2csr(matrix::Coo* A, matrix::Csr* B, memory::MemorySpace memspace) { - // NOTE: a note on deduplication: - // currently, this function allocates more memory than necessary to contain - // the matrix. this is because it's impossible to cleanly check the amount - // of space each row needs ahead of time without storing a full dense matrix - // with the dimensions of A - // - // additionally, this necessitates that we shrink the rows to remove this - // unused space, which is costly and necessitates a lot of shifting - // - // one potential solution to this that i thought of was to store an array of - // bloom filters (maybe u64s or u128s) of the size of the number of columns, - // each filter indicating if there is a value stored at that column in a row - // - // if, during the preprocessing step, we encounter a potential duplicate, we - // backtrack to see if there actually is one. given that the number of - // duplicates is probably small, this shouldn't carry too much of a - // performance penalty - // - // if performance becomes a problem, this could be coupled with arrays - // maintaining the last and/or first seen indices in the coo arrays at which - // a row had an associated value, to speed up the backtracking process - // - // this would be applied during the first allocation phase when an element - // for a row is encountered, prior to the increment of its size in - // `new_rows` - index_type* rows = A->getRowData(memory::HOST); index_type* columns = A->getColData(memory::HOST); real_type* values = A->getValues(memory::HOST); @@ -136,18 +110,6 @@ namespace ReSolve columns[i]) - new_columns); - // NOTE: the stuff beyond here is basically insertion sort. i'm not - // sure if it's the best way of going about sorting the - // column-value pairs, but it seemed to me to be the most - // natural way of going about this. the only potential - // benefit to using something else (that i can forsee) would - // be that on really large matrices with many nonzeroes in a - // row, something like mergesort might be better or maybe - // a reimpl of plain std::sort. the advantage offered by - // insertion sort here is that we can do it while we fill - // the row, as opposed to doing sorting in a postprocessing - // step as was done prior - if (new_columns[insertion_pos] == columns[i]) { new_values[insertion_pos] = values[i]; } else { @@ -186,8 +148,7 @@ namespace ReSolve } } - // backshifting stage, approximately O(n^2 * nnz) in the worst case - // (this probably isn't the tightest bound i could apply) + // backshifting stage, approximately O(nnz * m) in the worst case // // all this does is shift back rows to remove empty space in between them // by indexing each row in order, checking to see if the amount of used From 79cae170ba5c6e4f01f45e1e2dbc9687c793cd4a Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Fri, 12 Jul 2024 14:56:36 -0400 Subject: [PATCH 08/18] fix a spelling mistake within a comment --- resolve/matrix/Utilities.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index d542bc42..37ccecce 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -94,7 +94,7 @@ namespace ReSolve // fill stage, approximately O(nnz * m) in the worst case // // all this does is iterate over the nonzeroes in the coo matrix, - // check to see if a value at that colum already exists using binary search, + // check to see if a value at that column already exists using binary search, // and if it does, then insert the new value at that position (deduplicating // the matrix), otherwise, it allocates a new spot in the row (where you see // used[rows[i]]++) and shifts everything over, performing what is effectively From 553592ffd4f6c92a0e9c8076c6066d7d0aa885b9 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Mon, 15 Jul 2024 17:10:30 -0400 Subject: [PATCH 09/18] address review comments and fix tests --- resolve/matrix/Utilities.cpp | 177 ++++++++++---------- tests/unit/matrix/MatrixConversionTests.hpp | 8 +- 2 files changed, 93 insertions(+), 92 deletions(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index 37ccecce..fef42c6c 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -18,38 +18,40 @@ namespace ReSolve /** * @brief Creates a CSR from a COO matrix. * - * @param[in] A - * @param[out] B + * @param[in] A_coo + * @param[out] A_csr * @return int - Error code, 0 if successful. * - * @pre `A` is a valid sparse matrix in unordered COO format. Duplicates are allowed. - * Up-to-date values and indices must be on the host. + * @pre `A_coo` is a valid sparse matrix in unordered COO format. Duplicates are + * allowed. Up-to-date values and indices must be on the host. * - * @post `B` represents the same matrix as `A` but is in the CSR format. `B` is - * allocated and stored on the host. + * @post `A_csr` represents the same matrix as `A_coo` but is in the CSR format. + * `A_csr` is allocated and stored on the host. * - * @invariant `A` is not changed. + * @invariant `A_coo` is not changed. */ - int coo2csr(matrix::Coo* A, matrix::Csr* B, memory::MemorySpace memspace) + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) { - index_type* rows = A->getRowData(memory::HOST); - index_type* columns = A->getColData(memory::HOST); - real_type* values = A->getValues(memory::HOST); + index_type* coo_rows = A_coo->getRowData(memory::HOST); + index_type* coo_columns = A_coo->getColData(memory::HOST); + real_type* coo_values = A_coo->getValues(memory::HOST); - if (rows == nullptr || columns == nullptr || values == nullptr) { + if (coo_rows == nullptr || coo_columns == nullptr || coo_values == nullptr) { return 0; } - index_type nnz = A->getNnz(); - index_type n_rows = A->getNumRows(); - index_type* new_rows = new index_type[n_rows + 1]; - std::fill_n(new_rows, n_rows + 1, 0); + index_type nnz_with_duplicates = A_coo->getNnz(); + index_type n_rows = A_coo->getNumRows(); + index_type* csr_rows = new index_type[n_rows + 1]; + std::fill_n(csr_rows, n_rows + 1, 0); // NOTE: this is the only auxiliary storage buffer used by this conversion // function. it is first used to track the number of values on the // diagonal (if the matrix is symmetric and unexpanded), then it is - // used to track the amount of spaces used in each row's value and - // column data + // used to track the amount of elements present within each row while + // the rows are being filled. this is later used during the backshifting + // step, in which the excess space is compacted so that there is no + // left over space between each row std::unique_ptr used(new index_type[n_rows]); std::fill_n(used.get(), n_rows, 0); @@ -62,88 +64,88 @@ namespace ReSolve // and validates the triangularity. the branch is done to avoid the extra work if // it's not necessary - if (!A->symmetric() || A->expanded()) { - for (index_type i = 0; i < nnz; i++) { - new_rows[rows[i] + 1]++; + if (!A_coo->symmetric() || A_coo->expanded()) { + for (index_type i = 0; i < nnz_with_duplicates; i++) { + csr_rows[coo_rows[i] + 1]++; } } else { - bool upper_triangular = false; - for (index_type i = 0; i < nnz; i++) { - new_rows[rows[i] + 1]++; - if (rows[i] != columns[i]) { - used[columns[i]]++; + bool is_upper_triangular = false; + for (index_type i = 0; i < nnz_with_duplicates; i++) { + csr_rows[coo_rows[i] + 1]++; + if (coo_rows[i] != coo_columns[i]) { + used[coo_columns[i]]++; - if (rows[i] > columns[i] && upper_triangular) { + if (coo_rows[i] > coo_columns[i] && is_upper_triangular) { assert(false && "a matrix indicated to be symmetric triangular was not actually symmetric triangular"); return -1; } - upper_triangular = rows[i] < columns[i]; + is_upper_triangular = coo_rows[i] < coo_columns[i]; } } } for (index_type row = 0; row < n_rows; row++) { - new_rows[row + 1] += new_rows[row] + used[row]; + csr_rows[row + 1] += csr_rows[row] + used[row]; used[row] = 0; } - index_type* new_columns = new index_type[new_rows[n_rows]]; - std::fill_n(new_columns, new_rows[n_rows], -1); - real_type* new_values = new real_type[new_rows[n_rows]]; + index_type nnz_expanded_with_duplicates = csr_rows[n_rows]; + index_type* csr_columns = new index_type[nnz_expanded_with_duplicates]; + std::fill_n(csr_columns, nnz_expanded_with_duplicates, -1); + real_type* csr_values = new real_type[nnz_expanded_with_duplicates]; // fill stage, approximately O(nnz * m) in the worst case // // all this does is iterate over the nonzeroes in the coo matrix, // check to see if a value at that column already exists using binary search, - // and if it does, then insert the new value at that position (deduplicating + // and if it does, then add to the value at that position ("deduplicating" // the matrix), otherwise, it allocates a new spot in the row (where you see - // used[rows[i]]++) and shifts everything over, performing what is effectively - // insertion sort. the lower half is conditioned on the matrix being symmetric - // and stored as either upper-triangular or lower-triangular, and just - // performs the same as what is described above, but with the indices swapped. - - for (index_type i = 0; i < nnz; i++) { - index_type insertion_pos = - static_cast( - std::lower_bound(&new_columns[new_rows[rows[i]]], - &new_columns[new_rows[rows[i]] + used[rows[i]]], - columns[i]) - - new_columns); - - if (new_columns[insertion_pos] == columns[i]) { - new_values[insertion_pos] = values[i]; + // used[coo_rows[i]]++) and shifts everything over, performing what is + // effectively insertion sort. the lower half is conditioned on the matrix + // being symmetric and stored as either upper-triangular or lower-triangular, + // and just performs the same as what is described above, but with the + // indices swapped. + + for (index_type i = 0; i < nnz_with_duplicates; i++) { + index_type* closest_position = + std::lower_bound(&csr_columns[csr_rows[coo_rows[i]]], + &csr_columns[csr_rows[coo_rows[i]] + used[coo_rows[i]]], + coo_columns[i]); + index_type insertion_offset = static_cast(closest_position - csr_columns); + + if (csr_columns[insertion_offset] == coo_columns[i]) { + csr_values[insertion_offset] += coo_values[i]; } else { - for (index_type offset = new_rows[rows[i]] + used[rows[i]]++; - offset > insertion_pos; + for (index_type offset = csr_rows[coo_rows[i]] + used[coo_rows[i]]++; + offset > insertion_offset; offset--) { - std::swap(new_columns[offset], new_columns[offset - 1]); - std::swap(new_values[offset], new_values[offset - 1]); + std::swap(csr_columns[offset], csr_columns[offset - 1]); + std::swap(csr_values[offset], csr_values[offset - 1]); } - new_columns[insertion_pos] = columns[i]; - new_values[insertion_pos] = values[i]; + csr_columns[insertion_offset] = coo_columns[i]; + csr_values[insertion_offset] = coo_values[i]; } - if ((A->symmetric() && !A->expanded()) && (columns[i] != rows[i])) { - index_type mirrored_insertion_pos = - static_cast( - std::lower_bound(&new_columns[new_rows[columns[i]]], - &new_columns[new_rows[columns[i]] + used[columns[i]]], - rows[i]) - - new_columns); + if ((A_coo->symmetric() && !A_coo->expanded()) && (coo_columns[i] != coo_rows[i])) { + index_type* mirrored_closest_position = + std::lower_bound(&csr_columns[csr_rows[coo_columns[i]]], + &csr_columns[csr_rows[coo_columns[i]] + used[coo_columns[i]]], + coo_rows[i]); + index_type mirrored_insertion_offset = static_cast(mirrored_closest_position - csr_columns); - if (new_columns[mirrored_insertion_pos] == rows[i]) { - new_values[mirrored_insertion_pos] = values[i]; + if (csr_columns[mirrored_insertion_offset] == coo_rows[i]) { + csr_values[mirrored_insertion_offset] += coo_values[i]; } else { - for (index_type offset = new_rows[columns[i]] + used[columns[i]]++; - offset > mirrored_insertion_pos; + for (index_type offset = csr_rows[coo_columns[i]] + used[coo_columns[i]]++; + offset > mirrored_insertion_offset; offset--) { - std::swap(new_columns[offset], new_columns[offset - 1]); - std::swap(new_values[offset], new_values[offset - 1]); + std::swap(csr_columns[offset], csr_columns[offset - 1]); + std::swap(csr_values[offset], csr_values[offset - 1]); } - new_columns[mirrored_insertion_pos] = rows[i]; - new_values[mirrored_insertion_pos] = values[i]; + csr_columns[mirrored_insertion_offset] = coo_rows[i]; + csr_values[mirrored_insertion_offset] = coo_values[i]; } } } @@ -156,46 +158,47 @@ namespace ReSolve // shifts every subsequent row back the amount of unused spaces for (index_type row = 0; row < n_rows - 1; row++) { - index_type row_nnz = new_rows[row + 1] - new_rows[row]; + index_type row_nnz = csr_rows[row + 1] - csr_rows[row]; if (used[row] != row_nnz) { index_type correction = row_nnz - used[row]; for (index_type corrected_row = row + 1; corrected_row < n_rows; corrected_row++) { - for (index_type offset = new_rows[corrected_row]; - offset < new_rows[corrected_row + 1]; + for (index_type offset = csr_rows[corrected_row]; + offset < csr_rows[corrected_row + 1]; offset++) { - new_columns[offset - correction] = new_columns[offset]; - new_values[offset - correction] = new_values[offset]; + csr_columns[offset - correction] = csr_columns[offset]; + csr_values[offset - correction] = csr_values[offset]; } - new_rows[corrected_row] -= correction; + csr_rows[corrected_row] -= correction; } - new_rows[n_rows] -= correction; + csr_rows[n_rows] -= correction; } } - B->setSymmetric(A->symmetric()); - B->setNnz(new_rows[n_rows]); + index_type nnz_expanded_without_duplicates = csr_rows[n_rows]; + A_csr->setSymmetric(A_coo->symmetric()); + A_csr->setNnz(nnz_expanded_without_duplicates); // NOTE: this is necessary because updateData always reads the current nnz from // this field. see #176 - B->setNnzExpanded(new_rows[n_rows]); - B->setExpanded(true); + A_csr->setNnzExpanded(nnz_expanded_without_duplicates); + A_csr->setExpanded(true); - if (B->updateData(new_rows, new_columns, new_values, memory::HOST, memspace) != 0) { - delete[] new_rows; - delete[] new_columns; - delete[] new_values; + if (A_csr->updateData(csr_rows, csr_columns, csr_values, memory::HOST, memspace) != 0) { + delete[] csr_rows; + delete[] csr_columns; + delete[] csr_values; assert(false && "invalid state after coo -> csr conversion"); return -1; } - delete[] new_rows; - delete[] new_columns; - delete[] new_values; + delete[] csr_rows; + delete[] csr_columns; + delete[] csr_values; return 0; } diff --git a/tests/unit/matrix/MatrixConversionTests.hpp b/tests/unit/matrix/MatrixConversionTests.hpp index 9fe5cc50..d7f69a69 100644 --- a/tests/unit/matrix/MatrixConversionTests.hpp +++ b/tests/unit/matrix/MatrixConversionTests.hpp @@ -142,7 +142,7 @@ namespace ReSolve const index_type simple_symmetric_expected_nnz_ = 8; index_type simple_symmetric_expected_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; index_type simple_symmetric_expected_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; - real_type simple_symmetric_expected_a_[8] = {2.0, 4.0, 6.0, 7.0, 6.0, 7.0, 8.0, 8.0}; + real_type simple_symmetric_expected_a_[8] = {3.0, 7.0, 11.0, 7.0, 11.0, 7.0, 8.0, 8.0}; const index_type simple_upper_unexpanded_symmetric_n_ = 5; const index_type simple_upper_unexpanded_symmetric_m_ = 5; @@ -176,7 +176,7 @@ namespace ReSolve const index_type simple_asymmetric_expected_nnz_ = 8; index_type simple_asymmetric_expected_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; index_type simple_asymmetric_expected_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; - real_type simple_asymmetric_expected_a_[8] = {2.0, 4.0, 6.0, 7.0, 9.0, 6.0, 8.0, 8.0}; + real_type simple_asymmetric_expected_a_[8] = {2.0, 4.0, 11.0, 7.0, 9.0, 6.0, 15.0, 8.0}; bool verifyAnswer(matrix::Csr* A, const index_type& n, @@ -197,9 +197,7 @@ namespace ReSolve index_type answer_offset = 0; for (index_type i = 0; i < A->getNumRows(); i++) { for (index_type offset = rows[i]; offset < rows[i + 1]; offset++) { - if (i != is[answer_offset] || - columns[offset] != js[answer_offset] || - !isEqual(values[offset], as[answer_offset])) { + if (i != is[answer_offset] || columns[offset] != js[answer_offset] || !isEqual(values[offset], as[answer_offset])) { return false; } answer_offset++; From 9101b7bee9df989848f99f178b09afc4144ad159 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Mon, 15 Jul 2024 19:06:51 -0400 Subject: [PATCH 10/18] some additional inline documentation --- resolve/matrix/Utilities.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index fef42c6c..9fb269bb 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -107,10 +107,14 @@ namespace ReSolve // indices swapped. for (index_type i = 0; i < nnz_with_duplicates; i++) { + // this points to the first element not less than `coo_columns[i]`. see + // https://en.cppreference.com/w/cpp/algorithm/lower_bound for more details index_type* closest_position = std::lower_bound(&csr_columns[csr_rows[coo_rows[i]]], &csr_columns[csr_rows[coo_rows[i]] + used[coo_rows[i]]], coo_columns[i]); + + // this is the offset at which the element's value belongs index_type insertion_offset = static_cast(closest_position - csr_columns); if (csr_columns[insertion_offset] == coo_columns[i]) { From 5e983bf63d2667031da607655120c990c757a4f4 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Mon, 15 Jul 2024 19:33:14 -0400 Subject: [PATCH 11/18] fix triangularity verification --- resolve/matrix/Utilities.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index 9fb269bb..7810ccd7 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -70,16 +70,18 @@ namespace ReSolve } } else { bool is_upper_triangular = false; + bool is_lower_triangular = false; for (index_type i = 0; i < nnz_with_duplicates; i++) { csr_rows[coo_rows[i] + 1]++; if (coo_rows[i] != coo_columns[i]) { used[coo_columns[i]]++; - if (coo_rows[i] > coo_columns[i] && is_upper_triangular) { + is_upper_triangular |= coo_rows[i] < coo_columns[i]; + is_lower_triangular |= coo_rows[i] > coo_columns[i]; + if (is_upper_triangular && is_lower_triangular) { assert(false && "a matrix indicated to be symmetric triangular was not actually symmetric triangular"); return -1; } - is_upper_triangular = coo_rows[i] < coo_columns[i]; } } } From 3f3b271d6b2e3fdf6a6a8b4b46d31d6018805301 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Tue, 23 Jul 2024 13:45:01 -0400 Subject: [PATCH 12/18] coo2coo implementation, minus tests --- resolve/matrix/Utilities.cpp | 222 +++++++++++++++++++++++++++++++++-- resolve/matrix/Utilities.hpp | 3 + 2 files changed, 214 insertions(+), 11 deletions(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index 7810ccd7..aef25435 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -16,10 +16,210 @@ namespace ReSolve namespace matrix { /** - * @brief Creates a CSR from a COO matrix. + * @param[in] A + * @param[out] B + * @param[in] memspace + * @return int - Error code, 0 if successful * + * @pre `A` is a valid sparse matrix in an assumed to be unordered COO format. + * Duplicates are permitted, and the values and indices on the host must + * be up to date + * + * @post `B` is semantically the same matrix as `A`, but has been deduplicated, + * has been expanded if `A` was symmetric and either upper or lower + * triangular, and has had its triplets sorted in a "column-major" manner + * + * @invariant `A` is unchanged + */ + int coo2coo(matrix::Coo* A, matrix::Coo* B, memory::MemorySpace memspace) + { + index_type* a_rows = A->getRowData(memory::HOST); + index_type* a_columns = A->getColData(memory::HOST); + real_type* a_values = A->getValues(memory::HOST); + + if (a_rows == nullptr || a_columns == nullptr || a_values == nullptr) { + return 0; + } + + index_type nnz_with_duplicates = A->getNnz(); + index_type n_columns = A->getNumColumns(); + + // NOTE: auxiliary memory that is first used to track the amount + // of off-diagonal elements to perform space allocation when + // the matrix is symmetric and unexpanded, then is used to + // track the amount of space used in each column to allow + // out of order filling of values + std::unique_ptr used(new index_type[n_columns]); + std::fill_n(used.get(), n_columns, 0); + + // NOTE: column partitions of the destination matrices, used + // to allow out of order filling of values (as is done + // during expansion of the input matrix) + std::unique_ptr partitions(new index_type[n_columns + 1]); + std::fill_n(partitions.get(), n_columns + 1, 0); + + // allocation stage, the first loop is O(nnz) always and the second is O(n) + // + // computes the nnz of the destination matrix and allocates space for each + // column. additionally, the upper or lower diagonality is checked if the + // input matrix is symmetric and unexpanded + + if (!A->symmetric() || A->expanded()) { + for (index_type i = 0; i < nnz_with_duplicates; i++) { + partitions[a_columns[i] + 1]++; + } + } else { + bool is_upper_triangular = false; + bool is_lower_triangular = false; + + for (index_type i = 0; i < nnz_with_duplicates; i++) { + partitions[a_columns[i] + 1]++; + + if (a_rows[i] != a_columns[i]) { + used[a_rows[i]]++; + + is_upper_triangular |= a_rows[i] < a_columns[i]; + is_lower_triangular |= a_rows[i] > a_columns[i]; + if (is_upper_triangular && is_lower_triangular) { + assert(false && "a matrix indicated to be symmetric triangular was not actually symmetric triangular"); + return -1; + } + } + } + } + + for (index_type column = 0; column < n_columns; column++) { + partitions[column + 1] += partitions[column] + used[column]; + used[column] = 0; + } + + index_type new_nnz_with_duplicates = partitions[n_columns]; + index_type* b_rows = new index_type[new_nnz_with_duplicates]; + std::fill_n(b_rows, new_nnz_with_duplicates, -1); + index_type* b_columns = new index_type[new_nnz_with_duplicates]; + real_type* b_values = new real_type[new_nnz_with_duplicates]; + + // fill stage, approximately O(nnz * n) in the worst case + // + // all this does is iterate over the nonzeroes in the input matrix, + // check to see if a value at that column already exists using binary search, + // and if it does, then add to the value at that position, deduplicating the + // matrix. otherwise, it allocates a new spot in the row (where you see + // used[a_rows[i]]++) and shifts everything over, performing what is + // effectively insertion sort. the lower half is conditioned on the matrix + // being symmetric and stored as either upper-triangular or lower-triangular, + // and just performs the same as what is described above, but with the + // indices swapped. + + for (index_type i = 0; i < nnz_with_duplicates; i++) { + // this points to the first element not less than `a_rows[i]`. see + // https://en.cppreference.com/w/cpp/algorithm/lower_bound for more details + index_type* closest_position = + std::lower_bound(&b_rows[partitions[a_columns[i]]], + &b_rows[partitions[a_columns[i]] + used[a_columns[i]]], + a_rows[i]); + + // this is the offset at which the element's value belongs + index_type insertion_offset = static_cast(closest_position - b_rows); + + if (b_rows[insertion_offset] == a_rows[i]) { + b_values[insertion_offset] += a_values[i]; + } else { + for (index_type offset = partitions[a_columns[i]] + used[a_columns[i]]++; + offset > insertion_offset; + offset--) { + std::swap(b_rows[offset], b_rows[offset - 1]); + std::swap(b_columns[offset], b_columns[offset - 1]); + std::swap(b_values[offset], b_values[offset - 1]); + } + + b_rows[insertion_offset] = a_rows[i]; + b_columns[insertion_offset] = a_columns[i]; + b_values[insertion_offset] = a_values[i]; + } + + if ((A->symmetric() && !A->expanded()) && (a_columns[i] != a_rows[i])) { + index_type* mirrored_closest_position = + std::lower_bound(&b_rows[partitions[a_rows[i]]], + &b_rows[partitions[a_rows[i]] + used[a_rows[i]]], + a_columns[i]); + index_type mirrored_insertion_offset = static_cast(mirrored_closest_position - b_rows); + + if (b_rows[mirrored_insertion_offset] == a_columns[i]) { + b_values[mirrored_insertion_offset] += a_values[i]; + } else { + for (index_type offset = partitions[a_rows[i]] + used[a_rows[i]]++; + offset > mirrored_insertion_offset; + offset--) { + std::swap(b_rows[offset], b_rows[offset - 1]); + std::swap(b_columns[offset], b_columns[offset - 1]); + std::swap(b_values[offset], b_values[offset - 1]); + } + + b_rows[mirrored_insertion_offset] = a_columns[i]; + b_columns[mirrored_insertion_offset] = a_rows[i]; + b_values[mirrored_insertion_offset] = a_values[i]; + } + } + } + + // backshifting stage, approximately O(nnz * m) in the worst case + // + // all this does is shift back rows to remove empty space in between them + // by indexing each row in order, checking to see if the amount of used + // spaces is equivalent to the amount of nonzeroes in the row, and if not, + // shifts every subsequent row back the amount of unused spaces + + for (index_type column = 0; column < n_columns - 1; column++) { + index_type column_nnz = partitions[column + 1] - partitions[column]; + if (used[column] != column_nnz) { + index_type correction = column_nnz - used[column]; + + for (index_type corrected_column = column + 1; + corrected_column < n_columns; + corrected_column++) { + for (index_type offset = partitions[corrected_column]; + offset < partitions[corrected_column + 1]; + offset++) { + b_rows[offset - correction] = b_rows[offset]; + b_columns[offset - correction] = b_columns[offset]; + b_values[offset - correction] = b_values[offset]; + } + + partitions[corrected_column] -= correction; + } + + partitions[n_columns] -= correction; + } + } + + index_type new_nnz_without_duplicates = partitions[n_columns]; + B->setSymmetric(A->symmetric()); + B->setNnz(new_nnz_without_duplicates); + // NOTE: see the comment in coo2csr for why this is necessary + B->setNnzExpanded(new_nnz_without_duplicates); + B->setExpanded(true); + + if (B->updateData(b_rows, b_columns, b_values, memory::HOST, memspace) != 0) { + delete[] b_rows; + delete[] b_columns; + delete[] b_values; + + assert(false && "invalid state after coo -> coo conversion"); + return -1; + } + + delete[] b_rows; + delete[] b_columns; + delete[] b_values; + + return 0; + } + + /** * @param[in] A_coo * @param[out] A_csr + * @param[in] memspace * @return int - Error code, 0 if successful. * * @pre `A_coo` is a valid sparse matrix in unordered COO format. Duplicates are @@ -46,7 +246,7 @@ namespace ReSolve std::fill_n(csr_rows, n_rows + 1, 0); // NOTE: this is the only auxiliary storage buffer used by this conversion - // function. it is first used to track the number of values on the + // function. it is first used to track the number of values off the // diagonal (if the matrix is symmetric and unexpanded), then it is // used to track the amount of elements present within each row while // the rows are being filled. this is later used during the backshifting @@ -91,17 +291,17 @@ namespace ReSolve used[row] = 0; } - index_type nnz_expanded_with_duplicates = csr_rows[n_rows]; - index_type* csr_columns = new index_type[nnz_expanded_with_duplicates]; - std::fill_n(csr_columns, nnz_expanded_with_duplicates, -1); - real_type* csr_values = new real_type[nnz_expanded_with_duplicates]; + index_type new_nnz_with_duplicates = csr_rows[n_rows]; + index_type* csr_columns = new index_type[new_nnz_with_duplicates]; + std::fill_n(csr_columns, new_nnz_with_duplicates, -1); + real_type* csr_values = new real_type[new_nnz_with_duplicates]; // fill stage, approximately O(nnz * m) in the worst case // // all this does is iterate over the nonzeroes in the coo matrix, // check to see if a value at that column already exists using binary search, - // and if it does, then add to the value at that position ("deduplicating" - // the matrix), otherwise, it allocates a new spot in the row (where you see + // and if it does, then add to the value at that position, deduplicating + // the matrix. otherwise, it allocates a new spot in the row (where you see // used[coo_rows[i]]++) and shifts everything over, performing what is // effectively insertion sort. the lower half is conditioned on the matrix // being symmetric and stored as either upper-triangular or lower-triangular, @@ -185,12 +385,12 @@ namespace ReSolve } } - index_type nnz_expanded_without_duplicates = csr_rows[n_rows]; + index_type new_nnz_without_duplicates = csr_rows[n_rows]; A_csr->setSymmetric(A_coo->symmetric()); - A_csr->setNnz(nnz_expanded_without_duplicates); + A_csr->setNnz(new_nnz_without_duplicates); // NOTE: this is necessary because updateData always reads the current nnz from // this field. see #176 - A_csr->setNnzExpanded(nnz_expanded_without_duplicates); + A_csr->setNnzExpanded(new_nnz_without_duplicates); A_csr->setExpanded(true); if (A_csr->updateData(csr_rows, csr_columns, csr_values, memory::HOST, memspace) != 0) { diff --git a/resolve/matrix/Utilities.hpp b/resolve/matrix/Utilities.hpp index ecaee79e..285f054b 100644 --- a/resolve/matrix/Utilities.hpp +++ b/resolve/matrix/Utilities.hpp @@ -10,6 +10,9 @@ namespace ReSolve class Coo; class Csr; + /// @brief Performs various cleanup operations on a COO matrix + int coo2coo(matrix::Coo* A, matrix::Coo* B, memory::MemorySpace memspace); + /// @brief Converts symmetric or general COO to general CSR matrix int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace); } From 617404e75e62a1cc29ac13ac2d6ffe44465972d2 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Tue, 23 Jul 2024 15:53:46 -0400 Subject: [PATCH 13/18] tests for coo2coo and a fix for a bug in coo2csr and coo2coo --- resolve/matrix/Utilities.cpp | 4 +- tests/unit/matrix/MatrixConversionTests.hpp | 145 ++++++++++++++++---- 2 files changed, 117 insertions(+), 32 deletions(-) diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp index aef25435..9e60034a 100644 --- a/resolve/matrix/Utilities.cpp +++ b/resolve/matrix/Utilities.cpp @@ -170,7 +170,7 @@ namespace ReSolve // spaces is equivalent to the amount of nonzeroes in the row, and if not, // shifts every subsequent row back the amount of unused spaces - for (index_type column = 0; column < n_columns - 1; column++) { + for (index_type column = 0; column < n_columns; column++) { index_type column_nnz = partitions[column + 1] - partitions[column]; if (used[column] != column_nnz) { index_type correction = column_nnz - used[column]; @@ -363,7 +363,7 @@ namespace ReSolve // spaces is equivalent to the amount of nonzeroes in the row, and if not, // shifts every subsequent row back the amount of unused spaces - for (index_type row = 0; row < n_rows - 1; row++) { + for (index_type row = 0; row < n_rows; row++) { index_type row_nnz = csr_rows[row + 1] - csr_rows[row]; if (used[row] != row_nnz) { index_type correction = row_nnz - used[row]; diff --git a/tests/unit/matrix/MatrixConversionTests.hpp b/tests/unit/matrix/MatrixConversionTests.hpp index d7f69a69..cc66cf6a 100644 --- a/tests/unit/matrix/MatrixConversionTests.hpp +++ b/tests/unit/matrix/MatrixConversionTests.hpp @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -44,12 +46,23 @@ namespace ReSolve status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; status *= this->verifyAnswer(&B, - simple_symmetric_expected_n_, - simple_symmetric_expected_m_, - simple_symmetric_expected_nnz_, - simple_symmetric_expected_i_, - simple_symmetric_expected_j_, - simple_symmetric_expected_a_); + simple_symmetric_expected_csr_n_, + simple_symmetric_expected_csr_m_, + simple_symmetric_expected_csr_nnz_, + simple_symmetric_expected_csr_i_, + simple_symmetric_expected_csr_j_, + simple_symmetric_expected_csr_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_symmetric_expected_coo_col_n_, + simple_symmetric_expected_coo_col_m_, + simple_symmetric_expected_coo_col_nnz_, + simple_symmetric_expected_coo_col_i_, + simple_symmetric_expected_coo_col_j_, + simple_symmetric_expected_coo_col_a_); return status.report(__func__); } @@ -72,12 +85,23 @@ namespace ReSolve status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; status *= this->verifyAnswer(&B, - simple_symmetric_expected_n_, - simple_symmetric_expected_m_, - simple_symmetric_expected_nnz_, - simple_symmetric_expected_i_, - simple_symmetric_expected_j_, - simple_symmetric_expected_a_); + simple_symmetric_expected_csr_n_, + simple_symmetric_expected_csr_m_, + simple_symmetric_expected_csr_nnz_, + simple_symmetric_expected_csr_i_, + simple_symmetric_expected_csr_j_, + simple_symmetric_expected_csr_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_symmetric_expected_coo_col_n_, + simple_symmetric_expected_coo_col_m_, + simple_symmetric_expected_coo_col_nnz_, + simple_symmetric_expected_coo_col_i_, + simple_symmetric_expected_coo_col_j_, + simple_symmetric_expected_coo_col_a_); return status.report(__func__); } @@ -107,6 +131,17 @@ namespace ReSolve simple_main_diagonal_only_i_j_, simple_main_diagonal_only_a_); + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_main_diagonal_only_n_, + simple_main_diagonal_only_m_, + simple_main_diagonal_only_nnz_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_a_); + return status.report(__func__); } @@ -126,23 +161,41 @@ namespace ReSolve status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; status *= this->verifyAnswer(&B, - simple_asymmetric_expected_n_, - simple_asymmetric_expected_m_, - simple_asymmetric_expected_nnz_, - simple_asymmetric_expected_i_, - simple_asymmetric_expected_j_, - simple_asymmetric_expected_a_); + simple_asymmetric_expected_csr_n_, + simple_asymmetric_expected_csr_m_, + simple_asymmetric_expected_csr_nnz_, + simple_asymmetric_expected_csr_i_, + simple_asymmetric_expected_csr_j_, + simple_asymmetric_expected_csr_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_asymmetric_expected_coo_col_n_, + simple_asymmetric_expected_coo_col_m_, + simple_asymmetric_expected_coo_col_nnz_, + simple_asymmetric_expected_coo_col_i_, + simple_asymmetric_expected_coo_col_j_, + simple_asymmetric_expected_coo_col_a_); return status.report(__func__); } private: - const index_type simple_symmetric_expected_n_ = 5; - const index_type simple_symmetric_expected_m_ = 5; - const index_type simple_symmetric_expected_nnz_ = 8; - index_type simple_symmetric_expected_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; - index_type simple_symmetric_expected_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; - real_type simple_symmetric_expected_a_[8] = {3.0, 7.0, 11.0, 7.0, 11.0, 7.0, 8.0, 8.0}; + const index_type simple_symmetric_expected_csr_n_ = 5; + const index_type simple_symmetric_expected_csr_m_ = 5; + const index_type simple_symmetric_expected_csr_nnz_ = 8; + index_type simple_symmetric_expected_csr_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + index_type simple_symmetric_expected_csr_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + real_type simple_symmetric_expected_csr_a_[8] = {3.0, 7.0, 11.0, 7.0, 11.0, 7.0, 8.0, 8.0}; + + const index_type simple_symmetric_expected_coo_col_n_ = 5; + const index_type simple_symmetric_expected_coo_col_m_ = 5; + const index_type simple_symmetric_expected_coo_col_nnz_ = 8; + index_type simple_symmetric_expected_coo_col_i_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + index_type simple_symmetric_expected_coo_col_j_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + real_type simple_symmetric_expected_coo_col_a_[8] = {3.0, 7.0, 11.0, 7.0, 11.0, 7.0, 8.0, 8.0}; const index_type simple_upper_unexpanded_symmetric_n_ = 5; const index_type simple_upper_unexpanded_symmetric_m_ = 5; @@ -171,12 +224,19 @@ namespace ReSolve index_type simple_asymmetric_j_[10] = {0, 1, 3, 1, 1, 4, 4, 3, 2, 2}; real_type simple_asymmetric_a_[10] = {2.0, 4.0, 7.0, 9.0, 6.0, 7.0, 8.0, 8.0, 5.0, 6.0}; - const index_type simple_asymmetric_expected_n_ = 5; - const index_type simple_asymmetric_expected_m_ = 5; - const index_type simple_asymmetric_expected_nnz_ = 8; - index_type simple_asymmetric_expected_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; - index_type simple_asymmetric_expected_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; - real_type simple_asymmetric_expected_a_[8] = {2.0, 4.0, 11.0, 7.0, 9.0, 6.0, 15.0, 8.0}; + const index_type simple_asymmetric_expected_csr_n_ = 5; + const index_type simple_asymmetric_expected_csr_m_ = 5; + const index_type simple_asymmetric_expected_csr_nnz_ = 8; + index_type simple_asymmetric_expected_csr_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + index_type simple_asymmetric_expected_csr_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + real_type simple_asymmetric_expected_csr_a_[8] = {2.0, 4.0, 11.0, 7.0, 9.0, 6.0, 15.0, 8.0}; + + const index_type simple_asymmetric_expected_coo_col_n_ = 5; + const index_type simple_asymmetric_expected_coo_col_m_ = 5; + const index_type simple_asymmetric_expected_coo_col_nnz_ = 8; + index_type simple_asymmetric_expected_coo_col_i_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + index_type simple_asymmetric_expected_coo_col_j_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + real_type simple_asymmetric_expected_coo_col_a_[8] = {2.0, 4.0, 9.0, 6.0, 11.0, 7.0, 8.0, 15.0}; bool verifyAnswer(matrix::Csr* A, const index_type& n, @@ -206,6 +266,31 @@ namespace ReSolve return true; } + + bool verifyAnswer(matrix::Coo* A, + const index_type& n, + const index_type& m, + const index_type& nnz, + index_type* is, + index_type* js, + real_type* as) + { + if (n != A->getNumRows() || m != A->getNumColumns() || nnz != A->getNnz()) { + return false; + } + + index_type* rows = A->getRowData(memory::HOST); + index_type* columns = A->getColData(memory::HOST); + real_type* values = A->getValues(memory::HOST); + + for (index_type i = 0; i < nnz; i++) { + if (rows[i] != is[i] || columns[i] != js[i] || !isEqual(values[i], as[i])) { + return false; + } + } + + return true; + } }; } // namespace tests } // namespace ReSolve From a645211ca27d11a1b47a8c8ee7b349faecd8da8e Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Thu, 11 Jul 2024 13:59:07 -0400 Subject: [PATCH 14/18] initial implementation of a basic lusol example program --- examples/CMakeLists.txt | 34 +++--- examples/r_LUSOL_LUSOL.cpp | 180 +++++++++++++++++++++++++++++++ resolve/LinSolverDirectLUSOL.cpp | 88 +++++++++++---- 3 files changed, 267 insertions(+), 35 deletions(-) create mode 100644 examples/r_LUSOL_LUSOL.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9905a8e1..9eee8a53 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -21,6 +21,12 @@ if(RESOLVE_USE_KLU) target_link_libraries(system.exe PRIVATE ReSolve) endif(RESOLVE_USE_KLU) +if(RESOLVE_USE_LUSOL) + # Basic LUSOL example without any refactorization + add_executable(lusol_lusol.exe r_LUSOL_LUSOL.cpp) + target_link_libraries(lusol_lusol.exe PRIVATE ReSolve) +endif() + # Build rand example, CPU ONLY r_randGMRES_cpu add_executable(gmres_cpu_rand.exe r_randGMRES_cpu.cpp) @@ -34,23 +40,23 @@ if(RESOLVE_USE_CUDA) # Build example with KLU factorization and GLU refactorization add_executable(klu_glu.exe r_KLU_GLU.cpp) target_link_libraries(klu_glu.exe PRIVATE ReSolve) - + # Build example with KLU factorization and Rf refactorization add_executable(klu_rf.exe r_KLU_rf.cpp) target_link_libraries(klu_rf.exe PRIVATE ReSolve) - + # Build example with KLU factorization, Rf refactorization, and FGMRES iterative refinement add_executable(klu_rf_fgmres.exe r_KLU_rf_FGMRES.cpp) target_link_libraries(klu_rf_fgmres.exe PRIVATE ReSolve) - + # Build example where matrix is factorized once, refactorized once and then the preconditioner is REUSED add_executable(klu_rf_fgmres_reuse_refactorization.exe r_KLU_rf_FGMRES_reuse_factorization.cpp) target_link_libraries(klu_rf_fgmres_reuse_refactorization.exe PRIVATE ReSolve) - - # Build example where matrix data is updated + + # Build example where matrix data is updated add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp) target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve) - + # Build example with a configurable system solver add_executable(system_cuda.exe r_SysSolverCuda.cpp) target_link_libraries(system_cuda.exe PRIVATE ReSolve) @@ -75,25 +81,25 @@ if(RESOLVE_USE_HIP) # Build example with KLU factorization and rocsolver Rf refactorization add_executable(klu_rocsolverrf.exe r_KLU_rocsolverrf.cpp) target_link_libraries(klu_rocsolverrf.exe PRIVATE ReSolve) - + # Build example with KLU factorization, rocsolver Rf refactorization, and FGMRES iterative refinement add_executable(klu_rocsolverrf_fgmres.exe r_KLU_rocSolverRf_FGMRES.cpp) target_link_libraries(klu_rocsolverrf_fgmres.exe PRIVATE ReSolve) - + # Example in which factorization is redone if solution is bad add_executable(klu_rocsolverrf_check_redo.exe r_KLU_rocsolverrf_redo_factorization.cpp) target_link_libraries(klu_rocsolverrf_check_redo.exe PRIVATE ReSolve) - + # Build example with a configurable system solver add_executable(system_hip.exe r_SysSolverHip.cpp) target_link_libraries(system_hip.exe PRIVATE ReSolve) - + # Build example with a configurable system solver add_executable(system_hip_fgmres.exe r_SysSolverHipRefine.cpp) target_link_libraries(system_hip_fgmres.exe PRIVATE ReSolve) endif(RESOLVE_USE_KLU) - + # Rand GMRES test with rocsparse add_executable(gmres_rocsparse_rand.exe r_randGMRES.cpp) target_link_libraries(gmres_rocsparse_rand.exe PRIVATE ReSolve) @@ -121,9 +127,9 @@ if(RESOLVE_USE_HIP) list(APPEND installable_executables gmres_rocsparse_rand.exe) endif(RESOLVE_USE_HIP) - list(APPEND installable_executables gmres_cpu_rand.exe) + list(APPEND installable_executables gmres_cpu_rand.exe) -install(TARGETS ${installable_executables} +install(TARGETS ${installable_executables} RUNTIME DESTINATION bin) # Path where the consumer test code will be installed @@ -147,7 +153,7 @@ endif() install(DIRECTORY resolve_consumer DESTINATION share/examples) install(FILES ${PROJECT_SOURCE_DIR}/tests/functionality/${RESOLVE_CONSUMER_APP} DESTINATION share/examples/resolve_consumer RENAME consumer.cpp) -# Shell script argumets: +# Shell script argumets: # 1. Path to where resolve is installed. # 2. Path to data directory add_custom_target(test_install COMMAND ${CONSUMER_PATH}/test.sh ${CMAKE_INSTALL_PREFIX} ${PROJECT_SOURCE_DIR}/tests/functionality/) diff --git a/examples/r_LUSOL_LUSOL.cpp b/examples/r_LUSOL_LUSOL.cpp new file mode 100644 index 00000000..c55094af --- /dev/null +++ b/examples/r_LUSOL_LUSOL.cpp @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ReSolve::constants; +using index_type = ReSolve::index_type; +using real_type = ReSolve::real_type; +using vector_type = ReSolve::vector::Vector; + +int specializedMatvec(ReSolve::matrix::Coo* Ageneric, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) +{ + + ReSolve::matrix::Coo* A = static_cast(Ageneric); + index_type* rows = A->getRowData(ReSolve::memory::HOST); + index_type* columns = A->getColData(ReSolve::memory::HOST); + real_type* values = A->getValues(ReSolve::memory::HOST); + + real_type* xs = vec_x->getData(ReSolve::memory::HOST); + real_type* rs = vec_result->getData(ReSolve::memory::HOST); + + index_type n_rows = A->getNumRows(); + + std::unique_ptr sums(new real_type[n_rows]); + std::fill_n(sums.get(), n_rows, 0); + + std::unique_ptr compensations(new real_type[n_rows]); + std::fill_n(compensations.get(), n_rows, 0); + + real_type y, t; + + for (index_type i = 0; i < A->getNnz(); i++) { + y = (values[i] * xs[columns[i]]) - compensations[rows[i]]; + t = sums[rows[i]] + y; + compensations[rows[i]] = t - sums[rows[i]] - y; + sums[rows[i]] = t; + } + + for (index_type i = 0; i < n_rows; i++) { + sums[i] *= *alpha; + rs[i] = (rs[i] * *beta) + sums[i]; + } + + vec_result->setDataUpdated(ReSolve::memory::HOST); + return 0; +} + +int main(int argc, char* argv[]) +{ + + if (argc <= 4) { + std::cerr << "usage: lusol_lusol.exe MATRIX_FILE RHS_FILE NUM_SYSTEMS [MATRIX_FILE_ID RHS_FILE_ID...]" << std::endl; + return 1; + } + + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; + + index_type numSystems = atoi(argv[3]); + if (numSystems == 0) { + return 0; + } + + std::cout << "Family mtx file name: " << matrixFileName << ", total number of matrices: " << numSystems << std::endl; + std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; + + std::string fileId; + std::string rhsId; + std::string matrixFileNameFull; + std::string rhsFileNameFull; + + std::unique_ptr workspace(new ReSolve::LinAlgWorkspaceCpu()); + std::unique_ptr matrix_handler(new ReSolve::MatrixHandler(workspace.get())); + std::unique_ptr vector_handler(new ReSolve::VectorHandler(workspace.get())); + real_type norm_A, norm_x, norm_r; // used for INF norm + + std::unique_ptr lusol(new ReSolve::LinSolverDirectLUSOL); + + ReSolve::matrix::Coo* A; + real_type* rhs; + vector_type *vec_rhs, *vec_r, *vec_x; + + for (int i = 0; i < numSystems; ++i) { + index_type j = 4 + i * 2; + fileId = argv[j]; + rhsId = argv[j + 1]; + + matrixFileNameFull = ""; + rhsFileNameFull = ""; + + matrixFileNameFull = matrixFileName + fileId + ".mtx"; + rhsFileNameFull = rhsFileName + rhsId + ".mtx"; + std::cout << "Reading: " << matrixFileNameFull << std::endl; + + std::ifstream mat_file(matrixFileNameFull); + if (!mat_file.is_open()) { + std::cout << "Failed to open file " << matrixFileNameFull << "\n"; + return -1; + } + std::ifstream rhs_file(rhsFileNameFull); + if (!rhs_file.is_open()) { + std::cout << "Failed to open file " << rhsFileNameFull << "\n"; + return -1; + } + if (i == 0) { + A = ReSolve::io::readMatrixFromFile(mat_file); + rhs = ReSolve::io::readRhsFromFile(rhs_file); + vec_rhs = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); + } else { + ReSolve::io::readAndUpdateMatrix(mat_file, A); + ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); + } + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + ReSolve::matrix::expand(*A); + std::cout << "Matrix expansion completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; + // Now call direct solver + int status; + + lusol->setup(A); + status = lusol->analyze(); + std::cout << "LUSOL analysis status: " << status << std::endl; + status = lusol->factorize(); + std::cout << "LUSOL factorization status: " << status << std::endl; + status = lusol->solve(vec_rhs, vec_x); + std::cout << "LUSOL solve status: " << status << std::endl; + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); + + specializedMatvec(A, vec_x, vec_r, &ONE, &MINUSONE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::HOST); + + std::cout << "\t2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)) << "\n"; + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::HOST); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::HOST); + std::cout << "\tMatrix inf norm: " << std::scientific << std::setprecision(16) << norm_A << "\n" + << "\tResidual inf norm: " << norm_r << "\n" + << "\tSolution inf norm: " << norm_x << "\n" + << "\tNorm of scaled residuals: " << norm_r / (norm_A * norm_x) << "\n"; + } + + delete A; + delete[] rhs; + delete vec_r; + delete vec_x; + delete vec_rhs; + + return 0; +} diff --git a/resolve/LinSolverDirectLUSOL.cpp b/resolve/LinSolverDirectLUSOL.cpp index a8c579d3..f2064ad8 100644 --- a/resolve/LinSolverDirectLUSOL.cpp +++ b/resolve/LinSolverDirectLUSOL.cpp @@ -1,7 +1,7 @@ +#include #include #include #include -#include #include "LinSolverDirectLUSOL.hpp" #include "lusol/lusol.hpp" @@ -85,14 +85,14 @@ namespace ReSolve /** * @brief Analysis function of LUISOL - * + * * At this time, only memory allocation and initialization is done here. - * + * * @return int - 0 if successful, error code otherwise - * + * * @pre System matrix `A_` is in unsorted COO format without duplicates. - * - * @note LUSOL does not expose symbolic factorization in its API. It might + * + * @note LUSOL does not expose symbolic factorization in its API. It might * be possible refactor lu1fac into separate symbolic and numerical * factorization functions, but for now, we do the both in ::factorize(). */ @@ -156,7 +156,54 @@ namespace ReSolve w_, &inform); - // TODO: consider handling inform = 7 + if (inform == 7) { + delete[] a_; + delete[] indc_; + delete[] indr_; + + lena_ = luparm_[12]; + + a_ = new real_type[lena_]; + indc_ = new index_type[lena_]; + indr_ = new index_type[lena_]; + mem_.setZeroArrayOnHost(a_, lena_); + mem_.setZeroArrayOnHost(indc_, lena_); + mem_.setZeroArrayOnHost(indr_, lena_); + + mem_.setZeroArrayOnHost(p_, m_); + + mem_.setZeroArrayOnHost(q_, n_); + + mem_.setZeroArrayOnHost(lenc_, n_); + + mem_.setZeroArrayOnHost(lenr_, m_); + + mem_.setZeroArrayOnHost(locc_, n_); + + mem_.setZeroArrayOnHost(locr_, m_); + + mem_.setZeroArrayOnHost(iploc_, n_); + + mem_.setZeroArrayOnHost(iqloc_, m_); + + mem_.setZeroArrayOnHost(ipinv_, m_); + + mem_.setZeroArrayOnHost(iqinv_, n_); + + mem_.setZeroArrayOnHost(w_, n_); + + real_type* a_in = A_->getValues(memory::HOST); + index_type* indc_in = A_->getRowData(memory::HOST); + index_type* indr_in = A_->getColData(memory::HOST); + + for (index_type i = 0; i < nelem_; i++) { + a_[i] = a_in[i]; + indc_[i] = indc_in[i] + 1; + indr_[i] = indr_in[i] + 1; + } + + return factorize(); + } return inform; } @@ -280,7 +327,6 @@ namespace ReSolve mem_.setZeroArrayOnHost(indc_, lena_); mem_.setZeroArrayOnHost(indr_, lena_); - p_ = new index_type[m_]; mem_.setZeroArrayOnHost(p_, m_); @@ -313,7 +359,7 @@ namespace ReSolve w_ = new real_type[n_]; mem_.setZeroArrayOnHost(w_, n_); - + return 0; } @@ -333,20 +379,20 @@ namespace ReSolve delete[] ipinv_; delete[] iqinv_; delete[] w_; - a_ = nullptr; - indc_ = nullptr; - indr_ = nullptr; - p_ = nullptr; - q_ = nullptr; - lenc_ = nullptr; - lenr_ = nullptr; - locc_ = nullptr; - locr_ = nullptr; + a_ = nullptr; + indc_ = nullptr; + indr_ = nullptr; + p_ = nullptr; + q_ = nullptr; + lenc_ = nullptr; + lenr_ = nullptr; + locc_ = nullptr; + locr_ = nullptr; iploc_ = nullptr; - iqloc_ = nullptr; - ipinv_ = nullptr; + iqloc_ = nullptr; + ipinv_ = nullptr; iqinv_ = nullptr; - w_ = nullptr; + w_ = nullptr; return 0; } From c87d7049cfc3be42417bc73f3921d2b8b5c57a3f Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Thu, 11 Jul 2024 16:49:45 -0400 Subject: [PATCH 15/18] finish cleaning up the example source --- examples/r_LUSOL_LUSOL.cpp | 136 ++++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 64 deletions(-) diff --git a/examples/r_LUSOL_LUSOL.cpp b/examples/r_LUSOL_LUSOL.cpp index c55094af..3bbf2e27 100644 --- a/examples/r_LUSOL_LUSOL.cpp +++ b/examples/r_LUSOL_LUSOL.cpp @@ -6,8 +6,6 @@ #include #include -#include -#include #include #include #include @@ -61,29 +59,30 @@ int specializedMatvec(ReSolve::matrix::Coo* Ageneric, return 0; } +void usage() +{ + std::cerr << "usage: lusol_lusol.exe MATRIX_FILE RHS_FILE NUM_SYSTEMS [MATRIX_FILE_ID RHS_FILE_ID...]" << std::endl; + exit(1); +} + int main(int argc, char* argv[]) { if (argc <= 4) { - std::cerr << "usage: lusol_lusol.exe MATRIX_FILE RHS_FILE NUM_SYSTEMS [MATRIX_FILE_ID RHS_FILE_ID...]" << std::endl; - return 1; + usage(); } - std::string matrixFileName = argv[1]; - std::string rhsFileName = argv[2]; - - index_type numSystems = atoi(argv[3]); - if (numSystems == 0) { + index_type n_systems = atoi(argv[3]); + if (n_systems == 0) { return 0; } - std::cout << "Family mtx file name: " << matrixFileName << ", total number of matrices: " << numSystems << std::endl; - std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; + if (argc % 2 != 0 || (argc - 4) / 2 < n_systems) { + usage(); + } - std::string fileId; - std::string rhsId; - std::string matrixFileNameFull; - std::string rhsFileNameFull; + std::string matrix_file_prefix = argv[1]; + std::string rhs_file_prefix = argv[2]; std::unique_ptr workspace(new ReSolve::LinAlgWorkspaceCpu()); std::unique_ptr matrix_handler(new ReSolve::MatrixHandler(workspace.get())); @@ -92,89 +91,98 @@ int main(int argc, char* argv[]) std::unique_ptr lusol(new ReSolve::LinSolverDirectLUSOL); - ReSolve::matrix::Coo* A; + // NOTE: this has to be manually managed because of the way readAndUpdateRhs takes its arguments. + // fixing this would require the second parameter to be a reference to a pointer and not a + // pointer to a pointer real_type* rhs; - vector_type *vec_rhs, *vec_r, *vec_x; + std::unique_ptr A; + std::unique_ptr vec_rhs, vec_r, vec_x; - for (int i = 0; i < numSystems; ++i) { - index_type j = 4 + i * 2; - fileId = argv[j]; - rhsId = argv[j + 1]; + for (int system = 0; system < n_systems; system++) { - matrixFileNameFull = ""; - rhsFileNameFull = ""; + std::string matrix_file_path = matrix_file_prefix + argv[system + 4] + ".mtx"; + std::ifstream matrix_file(matrix_file_path); + if (!matrix_file.is_open()) { + std::cout << "Failed to open file " << matrix_file_path << "\n"; + return 1; + } - matrixFileNameFull = matrixFileName + fileId + ".mtx"; - rhsFileNameFull = rhsFileName + rhsId + ".mtx"; - std::cout << "Reading: " << matrixFileNameFull << std::endl; + std::cout << "Matrix file: " << matrix_file_path << std::endl; - std::ifstream mat_file(matrixFileNameFull); - if (!mat_file.is_open()) { - std::cout << "Failed to open file " << matrixFileNameFull << "\n"; - return -1; - } - std::ifstream rhs_file(rhsFileNameFull); + std::string rhs_file_path = rhs_file_prefix + argv[system + 5] + ".mtx"; + std::ifstream rhs_file(rhs_file_path); if (!rhs_file.is_open()) { - std::cout << "Failed to open file " << rhsFileNameFull << "\n"; - return -1; + std::cout << "Failed to open file " << rhs_file_path << "\n"; + return 1; } - if (i == 0) { - A = ReSolve::io::readMatrixFromFile(mat_file); + std::cout << "RHS file: " << rhs_file_path << std::endl; + + if (system == 0) { + A = std::unique_ptr(ReSolve::io::readMatrixFromFile(matrix_file)); rhs = ReSolve::io::readRhsFromFile(rhs_file); - vec_rhs = new vector_type(A->getNumRows()); - vec_r = new vector_type(A->getNumRows()); + vec_rhs = std::unique_ptr(new vector_type(A->getNumRows())); + vec_r = std::unique_ptr(new vector_type(A->getNumRows())); - vec_x = new vector_type(A->getNumRows()); + vec_x = std::unique_ptr(new vector_type(A->getNumRows())); vec_x->allocate(ReSolve::memory::HOST); } else { - ReSolve::io::readAndUpdateMatrix(mat_file, A); + ReSolve::io::readAndUpdateMatrix(matrix_file, A.get()); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } + + matrix_file.close(); + rhs_file.close(); + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() << ", nnz: " << A->getNnz() << ", symmetric? " << A->symmetric() << ", Expanded? " << A->expanded() << std::endl; - mat_file.close(); - rhs_file.close(); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - ReSolve::matrix::expand(*A); + + ReSolve::matrix::expand(*A.get()); std::cout << "Matrix expansion completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; - // Now call direct solver - int status; - - lusol->setup(A); - status = lusol->analyze(); - std::cout << "LUSOL analysis status: " << status << std::endl; - status = lusol->factorize(); - std::cout << "LUSOL factorization status: " << status << std::endl; - status = lusol->solve(vec_rhs, vec_x); - std::cout << "LUSOL solve status: " << status << std::endl; + + if (lusol->setup(A.get()) != 0) { + std::cout << "setup failed on matrix " << system << "/" << n_systems << std::endl; + return 1; + } + + if (lusol->analyze() != 0) { + std::cout << "analysis failed on matrix " << system << "/" << n_systems << std::endl; + return 1; + } + + if (lusol->factorize() != 0) { + std::cout << "factorization failed on matrix " << system << "/" << n_systems << std::endl; + return 1; + } + + if (lusol->solve(vec_rhs.get(), vec_x.get()) != 0) { + std::cout << "solving failed on matrix " << system << "/" << n_systems << std::endl; + return 1; + } + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); - specializedMatvec(A, vec_x, vec_r, &ONE, &MINUSONE); - norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::HOST); + specializedMatvec(A.get(), vec_x.get(), vec_r.get(), &ONE, &MINUSONE); + norm_r = vector_handler->infNorm(vec_r.get(), ReSolve::memory::HOST); std::cout << "\t2-Norm of the residual: " << std::scientific << std::setprecision(16) - << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)) << "\n"; - matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::HOST); - norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::HOST); - std::cout << "\tMatrix inf norm: " << std::scientific << std::setprecision(16) << norm_A << "\n" + << sqrt(vector_handler->dot(vec_r.get(), vec_r.get(), ReSolve::memory::HOST)) << "\n"; + matrix_handler->matrixInfNorm(A.get(), &norm_A, ReSolve::memory::HOST); + norm_x = vector_handler->infNorm(vec_x.get(), ReSolve::memory::HOST); + std::cout << "\tMatrix inf norm: " << std::scientific << std::setprecision(16) << norm_A << "\n" << "\tResidual inf norm: " << norm_r << "\n" << "\tSolution inf norm: " << norm_x << "\n" << "\tNorm of scaled residuals: " << norm_r / (norm_A * norm_x) << "\n"; } - delete A; delete[] rhs; - delete vec_r; - delete vec_x; - delete vec_rhs; - return 0; } From 3c33ef253bbbfcc7f65d3306dcaac616cb876a57 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Wed, 24 Jul 2024 10:52:39 -0400 Subject: [PATCH 16/18] update a path with the coo2csr-take-2 version --- resolve/LinSolverDirectLUSOL.cpp | 88 ++++++++------------------------ 1 file changed, 21 insertions(+), 67 deletions(-) diff --git a/resolve/LinSolverDirectLUSOL.cpp b/resolve/LinSolverDirectLUSOL.cpp index f2064ad8..a8c579d3 100644 --- a/resolve/LinSolverDirectLUSOL.cpp +++ b/resolve/LinSolverDirectLUSOL.cpp @@ -1,7 +1,7 @@ -#include #include #include #include +#include #include "LinSolverDirectLUSOL.hpp" #include "lusol/lusol.hpp" @@ -85,14 +85,14 @@ namespace ReSolve /** * @brief Analysis function of LUISOL - * + * * At this time, only memory allocation and initialization is done here. - * + * * @return int - 0 if successful, error code otherwise - * + * * @pre System matrix `A_` is in unsorted COO format without duplicates. - * - * @note LUSOL does not expose symbolic factorization in its API. It might + * + * @note LUSOL does not expose symbolic factorization in its API. It might * be possible refactor lu1fac into separate symbolic and numerical * factorization functions, but for now, we do the both in ::factorize(). */ @@ -156,54 +156,7 @@ namespace ReSolve w_, &inform); - if (inform == 7) { - delete[] a_; - delete[] indc_; - delete[] indr_; - - lena_ = luparm_[12]; - - a_ = new real_type[lena_]; - indc_ = new index_type[lena_]; - indr_ = new index_type[lena_]; - mem_.setZeroArrayOnHost(a_, lena_); - mem_.setZeroArrayOnHost(indc_, lena_); - mem_.setZeroArrayOnHost(indr_, lena_); - - mem_.setZeroArrayOnHost(p_, m_); - - mem_.setZeroArrayOnHost(q_, n_); - - mem_.setZeroArrayOnHost(lenc_, n_); - - mem_.setZeroArrayOnHost(lenr_, m_); - - mem_.setZeroArrayOnHost(locc_, n_); - - mem_.setZeroArrayOnHost(locr_, m_); - - mem_.setZeroArrayOnHost(iploc_, n_); - - mem_.setZeroArrayOnHost(iqloc_, m_); - - mem_.setZeroArrayOnHost(ipinv_, m_); - - mem_.setZeroArrayOnHost(iqinv_, n_); - - mem_.setZeroArrayOnHost(w_, n_); - - real_type* a_in = A_->getValues(memory::HOST); - index_type* indc_in = A_->getRowData(memory::HOST); - index_type* indr_in = A_->getColData(memory::HOST); - - for (index_type i = 0; i < nelem_; i++) { - a_[i] = a_in[i]; - indc_[i] = indc_in[i] + 1; - indr_[i] = indr_in[i] + 1; - } - - return factorize(); - } + // TODO: consider handling inform = 7 return inform; } @@ -327,6 +280,7 @@ namespace ReSolve mem_.setZeroArrayOnHost(indc_, lena_); mem_.setZeroArrayOnHost(indr_, lena_); + p_ = new index_type[m_]; mem_.setZeroArrayOnHost(p_, m_); @@ -359,7 +313,7 @@ namespace ReSolve w_ = new real_type[n_]; mem_.setZeroArrayOnHost(w_, n_); - + return 0; } @@ -379,20 +333,20 @@ namespace ReSolve delete[] ipinv_; delete[] iqinv_; delete[] w_; - a_ = nullptr; - indc_ = nullptr; - indr_ = nullptr; - p_ = nullptr; - q_ = nullptr; - lenc_ = nullptr; - lenr_ = nullptr; - locc_ = nullptr; - locr_ = nullptr; + a_ = nullptr; + indc_ = nullptr; + indr_ = nullptr; + p_ = nullptr; + q_ = nullptr; + lenc_ = nullptr; + lenr_ = nullptr; + locc_ = nullptr; + locr_ = nullptr; iploc_ = nullptr; - iqloc_ = nullptr; - ipinv_ = nullptr; + iqloc_ = nullptr; + ipinv_ = nullptr; iqinv_ = nullptr; - w_ = nullptr; + w_ = nullptr; return 0; } From 10e04540f075a0d8f5a17da48ee28463f65643b4 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Wed, 24 Jul 2024 11:32:48 -0400 Subject: [PATCH 17/18] adjusted example to work with coo2coo --- examples/r_LUSOL_LUSOL.cpp | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/examples/r_LUSOL_LUSOL.cpp b/examples/r_LUSOL_LUSOL.cpp index 3bbf2e27..dbfb0ba0 100644 --- a/examples/r_LUSOL_LUSOL.cpp +++ b/examples/r_LUSOL_LUSOL.cpp @@ -95,7 +95,7 @@ int main(int argc, char* argv[]) // fixing this would require the second parameter to be a reference to a pointer and not a // pointer to a pointer real_type* rhs; - std::unique_ptr A; + std::unique_ptr A_unexpanded, A; std::unique_ptr vec_rhs, vec_r, vec_x; for (int system = 0; system < n_systems; system++) { @@ -118,50 +118,53 @@ int main(int argc, char* argv[]) std::cout << "RHS file: " << rhs_file_path << std::endl; if (system == 0) { - A = std::unique_ptr(ReSolve::io::readMatrixFromFile(matrix_file)); + A_unexpanded = std::unique_ptr(ReSolve::io::readMatrixFromFile(matrix_file)); + A = std::unique_ptr(new ReSolve::matrix::Coo(A_unexpanded->getNumRows(), + A_unexpanded->getNumColumns(), + 0)); rhs = ReSolve::io::readRhsFromFile(rhs_file); - vec_rhs = std::unique_ptr(new vector_type(A->getNumRows())); - vec_r = std::unique_ptr(new vector_type(A->getNumRows())); + vec_rhs = std::unique_ptr(new vector_type(A_unexpanded->getNumRows())); + vec_r = std::unique_ptr(new vector_type(A_unexpanded->getNumRows())); - vec_x = std::unique_ptr(new vector_type(A->getNumRows())); + vec_x = std::unique_ptr(new vector_type(A_unexpanded->getNumRows())); vec_x->allocate(ReSolve::memory::HOST); } else { - ReSolve::io::readAndUpdateMatrix(matrix_file, A.get()); + ReSolve::io::readAndUpdateMatrix(matrix_file, A_unexpanded.get()); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } matrix_file.close(); rhs_file.close(); - std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() - << " x " << A->getNumColumns() - << ", nnz: " << A->getNnz() - << ", symmetric? " << A->symmetric() - << ", Expanded? " << A->expanded() << std::endl; + std::cout << "Finished reading the matrix and rhs, size: " << A_unexpanded->getNumRows() + << " x " << A_unexpanded->getNumColumns() + << ", nnz: " << A_unexpanded->getNnz() + << ", symmetric? " << A_unexpanded->symmetric() + << ", Expanded? " << A_unexpanded->expanded() << std::endl; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); - ReSolve::matrix::expand(*A.get()); + ReSolve::matrix::coo2coo(A_unexpanded.get(), A.get(), ReSolve::memory::HOST); std::cout << "Matrix expansion completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; if (lusol->setup(A.get()) != 0) { - std::cout << "setup failed on matrix " << system << "/" << n_systems << std::endl; + std::cout << "setup failed on matrix " << system + 1 << "/" << n_systems << std::endl; return 1; } if (lusol->analyze() != 0) { - std::cout << "analysis failed on matrix " << system << "/" << n_systems << std::endl; + std::cout << "analysis failed on matrix " << system + 1 << "/" << n_systems << std::endl; return 1; } if (lusol->factorize() != 0) { - std::cout << "factorization failed on matrix " << system << "/" << n_systems << std::endl; + std::cout << "factorization failed on matrix " << system + 1 << "/" << n_systems << std::endl; return 1; } if (lusol->solve(vec_rhs.get(), vec_x.get()) != 0) { - std::cout << "solving failed on matrix " << system << "/" << n_systems << std::endl; + std::cout << "solving failed on matrix " << system + 1 << "/" << n_systems << std::endl; return 1; } From bfba9f4bb472fe021475713e00d83d1fbec083f0 Mon Sep 17 00:00:00 2001 From: superwhiskers Date: Thu, 25 Jul 2024 11:32:08 -0400 Subject: [PATCH 18/18] fix argument interpretation --- examples/r_LUSOL_LUSOL.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/r_LUSOL_LUSOL.cpp b/examples/r_LUSOL_LUSOL.cpp index dbfb0ba0..68126ec7 100644 --- a/examples/r_LUSOL_LUSOL.cpp +++ b/examples/r_LUSOL_LUSOL.cpp @@ -100,7 +100,7 @@ int main(int argc, char* argv[]) for (int system = 0; system < n_systems; system++) { - std::string matrix_file_path = matrix_file_prefix + argv[system + 4] + ".mtx"; + std::string matrix_file_path = matrix_file_prefix + argv[(2 * system) + 4] + ".mtx"; std::ifstream matrix_file(matrix_file_path); if (!matrix_file.is_open()) { std::cout << "Failed to open file " << matrix_file_path << "\n"; @@ -109,7 +109,7 @@ int main(int argc, char* argv[]) std::cout << "Matrix file: " << matrix_file_path << std::endl; - std::string rhs_file_path = rhs_file_prefix + argv[system + 5] + ".mtx"; + std::string rhs_file_path = rhs_file_prefix + argv[(2 * system) + 5] + ".mtx"; std::ifstream rhs_file(rhs_file_path); if (!rhs_file.is_open()) { std::cout << "Failed to open file " << rhs_file_path << "\n";