diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index ad38dd5d..6a92975e 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -15,6 +15,7 @@ set(Matrix_SRC Coo.cpp MatrixHandler.cpp MatrixHandlerCpu.cpp + Utilities.cpp ) # C++ code that depends on CUDA SDK libraries @@ -35,6 +36,7 @@ set(Matrix_HEADER_INSTALL Csr.hpp Csc.hpp MatrixHandler.hpp + Utilities.hpp ) # Build shared library ReSolve::matrix diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index 325bb76b..4735e68a 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -7,6 +7,7 @@ #include "Coo.hpp" #include #include +#include namespace ReSolve { @@ -35,7 +36,7 @@ namespace ReSolve A_coo->symmetric(), A_coo->expanded()) { - coo2csr(A_coo, memspace); + matrix::coo2csr(A_coo, this, memspace); } /** @@ -374,149 +375,10 @@ namespace ReSolve assert(nnz_ == A_coo->getNnz()); assert(is_symmetric_ == A_coo->symmetric()); // <- Do we need to check for this? - return coo2csr(A_coo, memspaceOut); + return matrix::coo2csr(A_coo, this, memspaceOut); } - int matrix::Csr::coo2csr(matrix::Coo* A_coo, memory::MemorySpace memspace) - { - //count nnzs first - index_type nnz_unpacked = 0; - index_type nnz = A_coo->getNnz(); - index_type n = A_coo->getNumRows(); - bool symmetric = A_coo->symmetric(); - bool expanded = A_coo->expanded(); - - index_type* nnz_counts = new index_type[n]; - std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData(memory::HOST); - index_type* coo_cols = A_coo->getColData(memory::HOST); - real_type* coo_vals = A_coo->getValues( memory::HOST); - - index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal - std::fill_n(diag_control, n, 0); - index_type nnz_unpacked_no_duplicates = 0; - index_type nnz_no_duplicates = nnz; - - - //maybe check if they exist? - for (index_type i = 0; i < nnz; ++i) - { - nnz_counts[coo_rows[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) - { - nnz_counts[coo_cols[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - } - if (coo_rows[i] == coo_cols[i]){ - if (diag_control[coo_rows[i]] > 0) { - //duplicate - nnz_unpacked_no_duplicates--; - nnz_no_duplicates--; - } - diag_control[coo_rows[i]]++; - } - } - this->setExpanded(true); - this->setNnzExpanded(nnz_unpacked_no_duplicates); - index_type* csr_ia = new index_type[n+1]; - std::fill_n(csr_ia, n + 1, 0); - index_type* csr_ja = new index_type[nnz_unpacked]; - real_type* csr_a = new real_type[nnz_unpacked]; - index_type* nnz_shifts = new index_type[n]; - std::fill_n(nnz_shifts, n , 0); - - IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - - csr_ia[0] = 0; - - for (index_type i = 1; i < n + 1; ++i){ - csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); - } - - int r, start; - - - for (index_type i = 0; i < nnz; ++i){ - //which row - r = coo_rows[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - } - if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates - bool already_there = false; - for (index_type j = start; j < start + nnz_shifts[r]; ++j) - { - index_type c = tmp[j].getIdx(); - if (c == r) { - real_type val = tmp[j].getValue(); - val += coo_vals[i]; - tmp[j].setValue(val); - already_there = true; - out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; - } - } - if (!already_there){ // first time this duplicates appears - - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - - nnz_shifts[r]++; - } - } else {//not diagonal - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) - { - r = coo_cols[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) - out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - } - } - } - //now sort whatever is inside rows - - for (int i = 0; i < n; ++i) - { - //now sorting (and adding 1) - int colStart = csr_ia[i]; - int colEnd = csr_ia[i + 1]; - int length = colEnd - colStart; - std::sort(&tmp[colStart],&tmp[colStart] + length); - } - - for (index_type i = 0; i < nnz_unpacked; ++i) - { - csr_ja[i] = tmp[i].getIdx(); - csr_a[i] = tmp[i].getValue(); - } - - this->setNnz(nnz_no_duplicates); - this->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - - delete [] nnz_counts; - delete [] tmp; - delete [] nnz_shifts; - delete [] csr_ia; - delete [] csr_ja; - delete [] csr_a; - delete [] diag_control; - - return 0; - } - /** * @brief Prints matrix data. * diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index 7841a7fb..a3896a1d 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -49,8 +49,6 @@ namespace ReSolve { namespace matrix { int updateFromCoo(matrix::Coo* mat, memory::MemorySpace memspaceOut); - private: - int coo2csr(matrix::Coo* mat, memory::MemorySpace memspace); }; }} // namespace ReSolve::matrix diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 3fc9a492..b61a93b5 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include "MatrixHandler.hpp" #include "MatrixHandlerCpu.hpp" @@ -116,150 +116,7 @@ namespace ReSolve { */ int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) { - //count nnzs first - index_type nnz_unpacked = 0; - index_type nnz = A_coo->getNnz(); - index_type n = A_coo->getNumRows(); - bool symmetric = A_coo->symmetric(); - bool expanded = A_coo->expanded(); - - index_type* nnz_counts = new index_type[n]; - std::fill_n(nnz_counts, n, 0); - index_type* coo_rows = A_coo->getRowData(memory::HOST); - index_type* coo_cols = A_coo->getColData(memory::HOST); - real_type* coo_vals = A_coo->getValues( memory::HOST); - - index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal - std::fill_n(diag_control, n, 0); - index_type nnz_unpacked_no_duplicates = 0; - index_type nnz_no_duplicates = nnz; - - - //maybe check if they exist? - for (index_type i = 0; i < nnz; ++i) - { - nnz_counts[coo_rows[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) - { - nnz_counts[coo_cols[i]]++; - nnz_unpacked++; - nnz_unpacked_no_duplicates++; - } - if (coo_rows[i] == coo_cols[i]){ - if (diag_control[coo_rows[i]] > 0) { - //duplicate - nnz_unpacked_no_duplicates--; - nnz_no_duplicates--; - } - diag_control[coo_rows[i]]++; - } - } - A_csr->setExpanded(true); - A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); - index_type* csr_ia = new index_type[n+1]; - std::fill_n(csr_ia, n + 1, 0); - index_type* csr_ja = new index_type[nnz_unpacked]; - real_type* csr_a = new real_type[nnz_unpacked]; - index_type* nnz_shifts = new index_type[n]; - std::fill_n(nnz_shifts, n , 0); - - IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - - csr_ia[0] = 0; - - for (index_type i = 1; i < n + 1; ++i){ - csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); - } - - int r, start; - - - for (index_type i = 0; i < nnz; ++i){ - //which row - r = coo_rows[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) { - out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - } - if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates - bool already_there = false; - for (index_type j = start; j < start + nnz_shifts[r]; ++j) - { - index_type c = tmp[j].getIdx(); - if (c == r) { - real_type val = tmp[j].getValue(); - val += coo_vals[i]; - tmp[j].setValue(val); - already_there = true; - out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; - } - } - if (!already_there){ // first time this duplicates appears - - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - - nnz_shifts[r]++; - } - } else {//not diagonal - tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - - if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) - { - r = coo_cols[i]; - start = csr_ia[r]; - - if ((start + nnz_shifts[r]) > nnz_unpacked) - out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; - tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); - tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - nnz_shifts[r]++; - } - } - } - //now sort whatever is inside rows - - for (int i = 0; i < n; ++i) - { - - //now sorting (and adding 1) - int colStart = csr_ia[i]; - int colEnd = csr_ia[i + 1]; - int length = colEnd - colStart; - std::sort(&tmp[colStart],&tmp[colStart] + length); - } - - for (index_type i = 0; i < nnz_unpacked; ++i) - { - csr_ja[i] = tmp[i].getIdx(); - csr_a[i] = tmp[i].getValue(); - } -#if 0 - for (int i = 0; isetNnz(nnz_no_duplicates); - A_csr->updateData(csr_ia, csr_ja, csr_a, memory::HOST, memspace); - - delete [] nnz_counts; - delete [] tmp; - delete [] nnz_shifts; - delete [] csr_ia; - delete [] csr_ja; - delete [] csr_a; - delete [] diag_control; - - return 0; + return matrix::coo2csr(A_coo, A_csr, memspace); } /** diff --git a/resolve/matrix/Utilities.cpp b/resolve/matrix/Utilities.cpp new file mode 100644 index 00000000..9e60034a --- /dev/null +++ b/resolve/matrix/Utilities.cpp @@ -0,0 +1,412 @@ +#include +#include +#include + +#include "Utilities.hpp" + +#include +#include +#include +#include +#include + +namespace ReSolve +{ + using out = io::Logger; + namespace matrix + { + /** + * @param[in] A + * @param[out] B + * @param[in] memspace + * @return int - Error code, 0 if successful + * + * @pre `A` is a valid sparse matrix in an assumed to be unordered COO format. + * Duplicates are permitted, and the values and indices on the host must + * be up to date + * + * @post `B` is semantically the same matrix as `A`, but has been deduplicated, + * has been expanded if `A` was symmetric and either upper or lower + * triangular, and has had its triplets sorted in a "column-major" manner + * + * @invariant `A` is unchanged + */ + int coo2coo(matrix::Coo* A, matrix::Coo* B, memory::MemorySpace memspace) + { + index_type* a_rows = A->getRowData(memory::HOST); + index_type* a_columns = A->getColData(memory::HOST); + real_type* a_values = A->getValues(memory::HOST); + + if (a_rows == nullptr || a_columns == nullptr || a_values == nullptr) { + return 0; + } + + index_type nnz_with_duplicates = A->getNnz(); + index_type n_columns = A->getNumColumns(); + + // NOTE: auxiliary memory that is first used to track the amount + // of off-diagonal elements to perform space allocation when + // the matrix is symmetric and unexpanded, then is used to + // track the amount of space used in each column to allow + // out of order filling of values + std::unique_ptr used(new index_type[n_columns]); + std::fill_n(used.get(), n_columns, 0); + + // NOTE: column partitions of the destination matrices, used + // to allow out of order filling of values (as is done + // during expansion of the input matrix) + std::unique_ptr partitions(new index_type[n_columns + 1]); + std::fill_n(partitions.get(), n_columns + 1, 0); + + // allocation stage, the first loop is O(nnz) always and the second is O(n) + // + // computes the nnz of the destination matrix and allocates space for each + // column. additionally, the upper or lower diagonality is checked if the + // input matrix is symmetric and unexpanded + + if (!A->symmetric() || A->expanded()) { + for (index_type i = 0; i < nnz_with_duplicates; i++) { + partitions[a_columns[i] + 1]++; + } + } else { + bool is_upper_triangular = false; + bool is_lower_triangular = false; + + for (index_type i = 0; i < nnz_with_duplicates; i++) { + partitions[a_columns[i] + 1]++; + + if (a_rows[i] != a_columns[i]) { + used[a_rows[i]]++; + + is_upper_triangular |= a_rows[i] < a_columns[i]; + is_lower_triangular |= a_rows[i] > a_columns[i]; + if (is_upper_triangular && is_lower_triangular) { + assert(false && "a matrix indicated to be symmetric triangular was not actually symmetric triangular"); + return -1; + } + } + } + } + + for (index_type column = 0; column < n_columns; column++) { + partitions[column + 1] += partitions[column] + used[column]; + used[column] = 0; + } + + index_type new_nnz_with_duplicates = partitions[n_columns]; + index_type* b_rows = new index_type[new_nnz_with_duplicates]; + std::fill_n(b_rows, new_nnz_with_duplicates, -1); + index_type* b_columns = new index_type[new_nnz_with_duplicates]; + real_type* b_values = new real_type[new_nnz_with_duplicates]; + + // fill stage, approximately O(nnz * n) in the worst case + // + // all this does is iterate over the nonzeroes in the input matrix, + // check to see if a value at that column already exists using binary search, + // and if it does, then add to the value at that position, deduplicating the + // matrix. otherwise, it allocates a new spot in the row (where you see + // used[a_rows[i]]++) and shifts everything over, performing what is + // effectively insertion sort. the lower half is conditioned on the matrix + // being symmetric and stored as either upper-triangular or lower-triangular, + // and just performs the same as what is described above, but with the + // indices swapped. + + for (index_type i = 0; i < nnz_with_duplicates; i++) { + // this points to the first element not less than `a_rows[i]`. see + // https://en.cppreference.com/w/cpp/algorithm/lower_bound for more details + index_type* closest_position = + std::lower_bound(&b_rows[partitions[a_columns[i]]], + &b_rows[partitions[a_columns[i]] + used[a_columns[i]]], + a_rows[i]); + + // this is the offset at which the element's value belongs + index_type insertion_offset = static_cast(closest_position - b_rows); + + if (b_rows[insertion_offset] == a_rows[i]) { + b_values[insertion_offset] += a_values[i]; + } else { + for (index_type offset = partitions[a_columns[i]] + used[a_columns[i]]++; + offset > insertion_offset; + offset--) { + std::swap(b_rows[offset], b_rows[offset - 1]); + std::swap(b_columns[offset], b_columns[offset - 1]); + std::swap(b_values[offset], b_values[offset - 1]); + } + + b_rows[insertion_offset] = a_rows[i]; + b_columns[insertion_offset] = a_columns[i]; + b_values[insertion_offset] = a_values[i]; + } + + if ((A->symmetric() && !A->expanded()) && (a_columns[i] != a_rows[i])) { + index_type* mirrored_closest_position = + std::lower_bound(&b_rows[partitions[a_rows[i]]], + &b_rows[partitions[a_rows[i]] + used[a_rows[i]]], + a_columns[i]); + index_type mirrored_insertion_offset = static_cast(mirrored_closest_position - b_rows); + + if (b_rows[mirrored_insertion_offset] == a_columns[i]) { + b_values[mirrored_insertion_offset] += a_values[i]; + } else { + for (index_type offset = partitions[a_rows[i]] + used[a_rows[i]]++; + offset > mirrored_insertion_offset; + offset--) { + std::swap(b_rows[offset], b_rows[offset - 1]); + std::swap(b_columns[offset], b_columns[offset - 1]); + std::swap(b_values[offset], b_values[offset - 1]); + } + + b_rows[mirrored_insertion_offset] = a_columns[i]; + b_columns[mirrored_insertion_offset] = a_rows[i]; + b_values[mirrored_insertion_offset] = a_values[i]; + } + } + } + + // backshifting stage, approximately O(nnz * m) in the worst case + // + // all this does is shift back rows to remove empty space in between them + // by indexing each row in order, checking to see if the amount of used + // spaces is equivalent to the amount of nonzeroes in the row, and if not, + // shifts every subsequent row back the amount of unused spaces + + for (index_type column = 0; column < n_columns; column++) { + index_type column_nnz = partitions[column + 1] - partitions[column]; + if (used[column] != column_nnz) { + index_type correction = column_nnz - used[column]; + + for (index_type corrected_column = column + 1; + corrected_column < n_columns; + corrected_column++) { + for (index_type offset = partitions[corrected_column]; + offset < partitions[corrected_column + 1]; + offset++) { + b_rows[offset - correction] = b_rows[offset]; + b_columns[offset - correction] = b_columns[offset]; + b_values[offset - correction] = b_values[offset]; + } + + partitions[corrected_column] -= correction; + } + + partitions[n_columns] -= correction; + } + } + + index_type new_nnz_without_duplicates = partitions[n_columns]; + B->setSymmetric(A->symmetric()); + B->setNnz(new_nnz_without_duplicates); + // NOTE: see the comment in coo2csr for why this is necessary + B->setNnzExpanded(new_nnz_without_duplicates); + B->setExpanded(true); + + if (B->updateData(b_rows, b_columns, b_values, memory::HOST, memspace) != 0) { + delete[] b_rows; + delete[] b_columns; + delete[] b_values; + + assert(false && "invalid state after coo -> coo conversion"); + return -1; + } + + delete[] b_rows; + delete[] b_columns; + delete[] b_values; + + return 0; + } + + /** + * @param[in] A_coo + * @param[out] A_csr + * @param[in] memspace + * @return int - Error code, 0 if successful. + * + * @pre `A_coo` is a valid sparse matrix in unordered COO format. Duplicates are + * allowed. Up-to-date values and indices must be on the host. + * + * @post `A_csr` represents the same matrix as `A_coo` but is in the CSR format. + * `A_csr` is allocated and stored on the host. + * + * @invariant `A_coo` is not changed. + */ + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace) + { + index_type* coo_rows = A_coo->getRowData(memory::HOST); + index_type* coo_columns = A_coo->getColData(memory::HOST); + real_type* coo_values = A_coo->getValues(memory::HOST); + + if (coo_rows == nullptr || coo_columns == nullptr || coo_values == nullptr) { + return 0; + } + + index_type nnz_with_duplicates = A_coo->getNnz(); + index_type n_rows = A_coo->getNumRows(); + index_type* csr_rows = new index_type[n_rows + 1]; + std::fill_n(csr_rows, n_rows + 1, 0); + + // NOTE: this is the only auxiliary storage buffer used by this conversion + // function. it is first used to track the number of values off the + // diagonal (if the matrix is symmetric and unexpanded), then it is + // used to track the amount of elements present within each row while + // the rows are being filled. this is later used during the backshifting + // step, in which the excess space is compacted so that there is no + // left over space between each row + std::unique_ptr used(new index_type[n_rows]); + std::fill_n(used.get(), n_rows, 0); + + // allocation stage, the first loop is O(nnz) no matter the branch and the second + // is O(n) + // + // all this does is prepare the row index array based on the nonzeroes stored in + // the input coo matrix. if the matrix is symmetric and either upper or lower + // triangular, it additionally corrects for mirrored values using the `used` array + // and validates the triangularity. the branch is done to avoid the extra work if + // it's not necessary + + if (!A_coo->symmetric() || A_coo->expanded()) { + for (index_type i = 0; i < nnz_with_duplicates; i++) { + csr_rows[coo_rows[i] + 1]++; + } + } else { + bool is_upper_triangular = false; + bool is_lower_triangular = false; + for (index_type i = 0; i < nnz_with_duplicates; i++) { + csr_rows[coo_rows[i] + 1]++; + if (coo_rows[i] != coo_columns[i]) { + used[coo_columns[i]]++; + + is_upper_triangular |= coo_rows[i] < coo_columns[i]; + is_lower_triangular |= coo_rows[i] > coo_columns[i]; + if (is_upper_triangular && is_lower_triangular) { + assert(false && "a matrix indicated to be symmetric triangular was not actually symmetric triangular"); + return -1; + } + } + } + } + + for (index_type row = 0; row < n_rows; row++) { + csr_rows[row + 1] += csr_rows[row] + used[row]; + used[row] = 0; + } + + index_type new_nnz_with_duplicates = csr_rows[n_rows]; + index_type* csr_columns = new index_type[new_nnz_with_duplicates]; + std::fill_n(csr_columns, new_nnz_with_duplicates, -1); + real_type* csr_values = new real_type[new_nnz_with_duplicates]; + + // fill stage, approximately O(nnz * m) in the worst case + // + // all this does is iterate over the nonzeroes in the coo matrix, + // check to see if a value at that column already exists using binary search, + // and if it does, then add to the value at that position, deduplicating + // the matrix. otherwise, it allocates a new spot in the row (where you see + // used[coo_rows[i]]++) and shifts everything over, performing what is + // effectively insertion sort. the lower half is conditioned on the matrix + // being symmetric and stored as either upper-triangular or lower-triangular, + // and just performs the same as what is described above, but with the + // indices swapped. + + for (index_type i = 0; i < nnz_with_duplicates; i++) { + // this points to the first element not less than `coo_columns[i]`. see + // https://en.cppreference.com/w/cpp/algorithm/lower_bound for more details + index_type* closest_position = + std::lower_bound(&csr_columns[csr_rows[coo_rows[i]]], + &csr_columns[csr_rows[coo_rows[i]] + used[coo_rows[i]]], + coo_columns[i]); + + // this is the offset at which the element's value belongs + index_type insertion_offset = static_cast(closest_position - csr_columns); + + if (csr_columns[insertion_offset] == coo_columns[i]) { + csr_values[insertion_offset] += coo_values[i]; + } else { + for (index_type offset = csr_rows[coo_rows[i]] + used[coo_rows[i]]++; + offset > insertion_offset; + offset--) { + std::swap(csr_columns[offset], csr_columns[offset - 1]); + std::swap(csr_values[offset], csr_values[offset - 1]); + } + + csr_columns[insertion_offset] = coo_columns[i]; + csr_values[insertion_offset] = coo_values[i]; + } + + if ((A_coo->symmetric() && !A_coo->expanded()) && (coo_columns[i] != coo_rows[i])) { + index_type* mirrored_closest_position = + std::lower_bound(&csr_columns[csr_rows[coo_columns[i]]], + &csr_columns[csr_rows[coo_columns[i]] + used[coo_columns[i]]], + coo_rows[i]); + index_type mirrored_insertion_offset = static_cast(mirrored_closest_position - csr_columns); + + if (csr_columns[mirrored_insertion_offset] == coo_rows[i]) { + csr_values[mirrored_insertion_offset] += coo_values[i]; + } else { + for (index_type offset = csr_rows[coo_columns[i]] + used[coo_columns[i]]++; + offset > mirrored_insertion_offset; + offset--) { + std::swap(csr_columns[offset], csr_columns[offset - 1]); + std::swap(csr_values[offset], csr_values[offset - 1]); + } + + csr_columns[mirrored_insertion_offset] = coo_rows[i]; + csr_values[mirrored_insertion_offset] = coo_values[i]; + } + } + } + + // backshifting stage, approximately O(nnz * m) in the worst case + // + // all this does is shift back rows to remove empty space in between them + // by indexing each row in order, checking to see if the amount of used + // spaces is equivalent to the amount of nonzeroes in the row, and if not, + // shifts every subsequent row back the amount of unused spaces + + for (index_type row = 0; row < n_rows; row++) { + index_type row_nnz = csr_rows[row + 1] - csr_rows[row]; + if (used[row] != row_nnz) { + index_type correction = row_nnz - used[row]; + + for (index_type corrected_row = row + 1; + corrected_row < n_rows; + corrected_row++) { + for (index_type offset = csr_rows[corrected_row]; + offset < csr_rows[corrected_row + 1]; + offset++) { + csr_columns[offset - correction] = csr_columns[offset]; + csr_values[offset - correction] = csr_values[offset]; + } + + csr_rows[corrected_row] -= correction; + } + + csr_rows[n_rows] -= correction; + } + } + + index_type new_nnz_without_duplicates = csr_rows[n_rows]; + A_csr->setSymmetric(A_coo->symmetric()); + A_csr->setNnz(new_nnz_without_duplicates); + // NOTE: this is necessary because updateData always reads the current nnz from + // this field. see #176 + A_csr->setNnzExpanded(new_nnz_without_duplicates); + A_csr->setExpanded(true); + + if (A_csr->updateData(csr_rows, csr_columns, csr_values, memory::HOST, memspace) != 0) { + delete[] csr_rows; + delete[] csr_columns; + delete[] csr_values; + + assert(false && "invalid state after coo -> csr conversion"); + return -1; + } + + delete[] csr_rows; + delete[] csr_columns; + delete[] csr_values; + + return 0; + } + } // namespace matrix +} // namespace ReSolve diff --git a/resolve/matrix/Utilities.hpp b/resolve/matrix/Utilities.hpp new file mode 100644 index 00000000..285f054b --- /dev/null +++ b/resolve/matrix/Utilities.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include + +namespace ReSolve +{ + namespace matrix + { + // Forward declarations + class Coo; + class Csr; + + /// @brief Performs various cleanup operations on a COO matrix + int coo2coo(matrix::Coo* A, matrix::Coo* B, memory::MemorySpace memspace); + + /// @brief Converts symmetric or general COO to general CSR matrix + int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, memory::MemorySpace memspace); + } +} diff --git a/tests/unit/matrix/CMakeLists.txt b/tests/unit/matrix/CMakeLists.txt index 5b939056..d313248a 100644 --- a/tests/unit/matrix/CMakeLists.txt +++ b/tests/unit/matrix/CMakeLists.txt @@ -18,6 +18,10 @@ target_link_libraries(runMatrixHandlerTests.exe PRIVATE ReSolve resolve_matrix) add_executable(runMatrixFactorizationTests.exe runMatrixFactorizationTests.cpp) target_link_libraries(runMatrixFactorizationTests.exe PRIVATE ReSolve resolve_matrix) +# Build matrix conversion tests +add_executable(runMatrixConversionTests.exe runMatrixConversionTests.cpp) +target_link_libraries(runMatrixConversionTests.exe PRIVATE ReSolve resolve_matrix) + # Build LUSOL-related tests if(RESOLVE_USE_LUSOL) add_executable(runLUSOLTests.exe runLUSOLTests.cpp) @@ -25,7 +29,7 @@ if(RESOLVE_USE_LUSOL) endif() # Install tests -set(installable_tests runMatrixIoTests.exe runMatrixHandlerTests.exe runMatrixFactorizationTests.exe) +set(installable_tests runMatrixIoTests.exe runMatrixHandlerTests.exe runMatrixFactorizationTests.exe runMatrixConversionTests.exe) if(RESOLVE_USE_LUSOL) list(APPEND installable_tests runLUSOLTests.exe) endif() @@ -35,6 +39,7 @@ install(TARGETS ${installable_tests} add_test(NAME matrix_test COMMAND $) add_test(NAME matrix_handler_test COMMAND $) add_test(NAME matrix_factorization_test COMMAND $) +add_test(NAME matrix_conversion_test COMMAND $) if(RESOLVE_USE_LUSOL) add_test(NAME lusol_test COMMAND $) endif() diff --git a/tests/unit/matrix/MatrixConversionTests.hpp b/tests/unit/matrix/MatrixConversionTests.hpp new file mode 100644 index 00000000..cc66cf6a --- /dev/null +++ b/tests/unit/matrix/MatrixConversionTests.hpp @@ -0,0 +1,296 @@ +#pragma once + +#include + +#include +#include +#include +#include + +using index_type = ReSolve::index_type; +using real_type = ReSolve::real_type; + +namespace ReSolve +{ + namespace tests + { + /** + * @class Unit tests for matrix conversions + */ + class MatrixConversionTests : TestBase + { + public: + MatrixConversionTests() + { + } + + virtual ~MatrixConversionTests() + { + } + + TestOutcome simpleUpperUnexpandedSymmetricMatrix() + { + TestStatus status; + + matrix::Coo A(simple_upper_unexpanded_symmetric_n_, + simple_upper_unexpanded_symmetric_m_, + simple_upper_unexpanded_symmetric_nnz_, + true, + false); + A.updateData(simple_upper_unexpanded_symmetric_i_, + simple_upper_unexpanded_symmetric_j_, + simple_upper_unexpanded_symmetric_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_symmetric_expected_csr_n_, + simple_symmetric_expected_csr_m_, + simple_symmetric_expected_csr_nnz_, + simple_symmetric_expected_csr_i_, + simple_symmetric_expected_csr_j_, + simple_symmetric_expected_csr_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_symmetric_expected_coo_col_n_, + simple_symmetric_expected_coo_col_m_, + simple_symmetric_expected_coo_col_nnz_, + simple_symmetric_expected_coo_col_i_, + simple_symmetric_expected_coo_col_j_, + simple_symmetric_expected_coo_col_a_); + + return status.report(__func__); + } + + TestOutcome simpleLowerUnexpandedSymmetricMatrix() + { + TestStatus status; + + matrix::Coo A(simple_lower_unexpanded_symmetric_n_, + simple_lower_unexpanded_symmetric_m_, + simple_lower_unexpanded_symmetric_nnz_, + true, + false); + A.updateData(simple_lower_unexpanded_symmetric_i_, + simple_lower_unexpanded_symmetric_j_, + simple_lower_unexpanded_symmetric_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_symmetric_expected_csr_n_, + simple_symmetric_expected_csr_m_, + simple_symmetric_expected_csr_nnz_, + simple_symmetric_expected_csr_i_, + simple_symmetric_expected_csr_j_, + simple_symmetric_expected_csr_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_symmetric_expected_coo_col_n_, + simple_symmetric_expected_coo_col_m_, + simple_symmetric_expected_coo_col_nnz_, + simple_symmetric_expected_coo_col_i_, + simple_symmetric_expected_coo_col_j_, + simple_symmetric_expected_coo_col_a_); + + return status.report(__func__); + } + + TestOutcome simpleMainDiagonalOnlyMatrix() + { + TestStatus status; + + matrix::Coo A(simple_main_diagonal_only_n_, + simple_main_diagonal_only_m_, + simple_main_diagonal_only_nnz_, + true, + false); + A.updateData(simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_main_diagonal_only_n_, + simple_main_diagonal_only_m_, + simple_main_diagonal_only_nnz_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_main_diagonal_only_n_, + simple_main_diagonal_only_m_, + simple_main_diagonal_only_nnz_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_i_j_, + simple_main_diagonal_only_a_); + + return status.report(__func__); + } + + TestOutcome simpleFullAsymmetricMatrix() + { + TestStatus status; + + matrix::Coo A(simple_asymmetric_n_, + simple_asymmetric_m_, + simple_asymmetric_nnz_); + A.updateData(simple_asymmetric_i_, + simple_asymmetric_j_, + simple_asymmetric_a_, + memory::HOST, + memory::HOST); + ReSolve::matrix::Csr B(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2csr(&A, &B, memory::HOST) == 0; + status *= this->verifyAnswer(&B, + simple_asymmetric_expected_csr_n_, + simple_asymmetric_expected_csr_m_, + simple_asymmetric_expected_csr_nnz_, + simple_asymmetric_expected_csr_i_, + simple_asymmetric_expected_csr_j_, + simple_asymmetric_expected_csr_a_); + + ReSolve::matrix::Coo C(A.getNumRows(), A.getNumColumns(), 0); + + status *= ReSolve::matrix::coo2coo(&A, &C, memory::HOST) == 0; + status *= this->verifyAnswer(&C, + simple_asymmetric_expected_coo_col_n_, + simple_asymmetric_expected_coo_col_m_, + simple_asymmetric_expected_coo_col_nnz_, + simple_asymmetric_expected_coo_col_i_, + simple_asymmetric_expected_coo_col_j_, + simple_asymmetric_expected_coo_col_a_); + + return status.report(__func__); + } + + private: + const index_type simple_symmetric_expected_csr_n_ = 5; + const index_type simple_symmetric_expected_csr_m_ = 5; + const index_type simple_symmetric_expected_csr_nnz_ = 8; + index_type simple_symmetric_expected_csr_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + index_type simple_symmetric_expected_csr_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + real_type simple_symmetric_expected_csr_a_[8] = {3.0, 7.0, 11.0, 7.0, 11.0, 7.0, 8.0, 8.0}; + + const index_type simple_symmetric_expected_coo_col_n_ = 5; + const index_type simple_symmetric_expected_coo_col_m_ = 5; + const index_type simple_symmetric_expected_coo_col_nnz_ = 8; + index_type simple_symmetric_expected_coo_col_i_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + index_type simple_symmetric_expected_coo_col_j_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + real_type simple_symmetric_expected_coo_col_a_[8] = {3.0, 7.0, 11.0, 7.0, 11.0, 7.0, 8.0, 8.0}; + + const index_type simple_upper_unexpanded_symmetric_n_ = 5; + const index_type simple_upper_unexpanded_symmetric_m_ = 5; + const index_type simple_upper_unexpanded_symmetric_nnz_ = 8; + index_type simple_upper_unexpanded_symmetric_i_[8] = {0, 0, 1, 1, 1, 1, 1, 3}; + index_type simple_upper_unexpanded_symmetric_j_[8] = {0, 0, 1, 1, 2, 2, 3, 4}; + real_type simple_upper_unexpanded_symmetric_a_[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + + const index_type simple_main_diagonal_only_n_ = 5; + const index_type simple_main_diagonal_only_m_ = 5; + const index_type simple_main_diagonal_only_nnz_ = 3; + index_type simple_main_diagonal_only_i_j_[3] = {1, 3, 4}; + real_type simple_main_diagonal_only_a_[3] = {1.0, 2.0, 3.0}; + + const index_type simple_lower_unexpanded_symmetric_n_ = 5; + const index_type simple_lower_unexpanded_symmetric_m_ = 5; + const index_type simple_lower_unexpanded_symmetric_nnz_ = 8; + index_type simple_lower_unexpanded_symmetric_i_[8] = {0, 0, 1, 1, 2, 2, 3, 4}; + index_type simple_lower_unexpanded_symmetric_j_[8] = {0, 0, 1, 1, 1, 1, 1, 3}; + real_type simple_lower_unexpanded_symmetric_a_[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; + + const index_type simple_asymmetric_n_ = 5; + const index_type simple_asymmetric_m_ = 5; + const index_type simple_asymmetric_nnz_ = 10; + index_type simple_asymmetric_i_[10] = {0, 1, 1, 2, 3, 3, 3, 4, 1, 1}; + index_type simple_asymmetric_j_[10] = {0, 1, 3, 1, 1, 4, 4, 3, 2, 2}; + real_type simple_asymmetric_a_[10] = {2.0, 4.0, 7.0, 9.0, 6.0, 7.0, 8.0, 8.0, 5.0, 6.0}; + + const index_type simple_asymmetric_expected_csr_n_ = 5; + const index_type simple_asymmetric_expected_csr_m_ = 5; + const index_type simple_asymmetric_expected_csr_nnz_ = 8; + index_type simple_asymmetric_expected_csr_i_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + index_type simple_asymmetric_expected_csr_j_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + real_type simple_asymmetric_expected_csr_a_[8] = {2.0, 4.0, 11.0, 7.0, 9.0, 6.0, 15.0, 8.0}; + + const index_type simple_asymmetric_expected_coo_col_n_ = 5; + const index_type simple_asymmetric_expected_coo_col_m_ = 5; + const index_type simple_asymmetric_expected_coo_col_nnz_ = 8; + index_type simple_asymmetric_expected_coo_col_i_[8] = {0, 1, 2, 3, 1, 1, 4, 3}; + index_type simple_asymmetric_expected_coo_col_j_[8] = {0, 1, 1, 1, 2, 3, 3, 4}; + real_type simple_asymmetric_expected_coo_col_a_[8] = {2.0, 4.0, 9.0, 6.0, 11.0, 7.0, 8.0, 15.0}; + + bool verifyAnswer(matrix::Csr* A, + const index_type& n, + const index_type& m, + const index_type& nnz, + index_type* is, + index_type* js, + real_type* as) + { + if (n != A->getNumRows() || m != A->getNumColumns() || nnz != A->getNnz()) { + return false; + } + + index_type* rows = A->getRowData(memory::HOST); + index_type* columns = A->getColData(memory::HOST); + real_type* values = A->getValues(memory::HOST); + + index_type answer_offset = 0; + for (index_type i = 0; i < A->getNumRows(); i++) { + for (index_type offset = rows[i]; offset < rows[i + 1]; offset++) { + if (i != is[answer_offset] || columns[offset] != js[answer_offset] || !isEqual(values[offset], as[answer_offset])) { + return false; + } + answer_offset++; + } + } + + return true; + } + + bool verifyAnswer(matrix::Coo* A, + const index_type& n, + const index_type& m, + const index_type& nnz, + index_type* is, + index_type* js, + real_type* as) + { + if (n != A->getNumRows() || m != A->getNumColumns() || nnz != A->getNnz()) { + return false; + } + + index_type* rows = A->getRowData(memory::HOST); + index_type* columns = A->getColData(memory::HOST); + real_type* values = A->getValues(memory::HOST); + + for (index_type i = 0; i < nnz; i++) { + if (rows[i] != is[i] || columns[i] != js[i] || !isEqual(values[i], as[i])) { + return false; + } + } + + return true; + } + }; + } // namespace tests +} // namespace ReSolve diff --git a/tests/unit/matrix/runMatrixConversionTests.cpp b/tests/unit/matrix/runMatrixConversionTests.cpp new file mode 100644 index 00000000..d8e9c156 --- /dev/null +++ b/tests/unit/matrix/runMatrixConversionTests.cpp @@ -0,0 +1,18 @@ +#include + +#include "MatrixConversionTests.hpp" + +int main() +{ + ReSolve::tests::TestingResults result; + ReSolve::tests::MatrixConversionTests test; + + result += test.simpleUpperUnexpandedSymmetricMatrix(); + result += test.simpleLowerUnexpandedSymmetricMatrix(); + result += test.simpleMainDiagonalOnlyMatrix(); + result += test.simpleFullAsymmetricMatrix(); + + std::cout << std::endl; + + return result.summary(); +}