Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Further experiments with mumps performance #65

Merged
merged 10 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions russell_lab/src/base/stopwatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ impl Stopwatch {
}
}

/// Stops the stopwatch and returns the elapsed time
/// Stops the stopwatch and returns the elapsed time in nanoseconds
///
/// # Output
///
/// Returns the elapsed time
/// Returns the elapsed time in nanoseconds
///
/// # Example
///
Expand Down
14 changes: 13 additions & 1 deletion russell_sparse/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ The output looks like this:

## <a name="issues"></a> MUMPS + OpenBLAS issue

We found that MUMPS + OpenBLAS enters an infinite loop when the number of OpenMP threads is left to be automatically set.
We found that MUMPS + OpenBLAS becomes very, very slow when the number of OpenMP threads is left automatic, i.e., using the available number of threads. Thus, with OpenBLAS, it is recommended to set LinSolParams.mumps_num_threads = 1 (this is automatically set when using OpenBLAS).

This issue has also been discovered by [1](#ref1), who states (page 72) _"We have observed that multi-threading of OpenBLAS library in MUMPS leads to multiple thread conflicts which sometimes result in significant slow-down of the solver."_

Expand All @@ -224,6 +224,18 @@ Therefore, we have to take one of the two approaches:

This issue has **not** been noticed with MUMPS + Intel MKL.

Command to reproduce the issue:

```bash
OMP_NUM_THREADS=20 ~/rust_modules/release/solve_matrix_market -g mumps ~/Downloads/matrix-market/inline_1.mtx -m 0 -v --override-prevent-issue
```

Also, to reproduce the issue, we need:

* Git hash = e020d9c8486502bd898d93a1998a0cf23c4d5057
* Remove Debian OpenBLAS, MUMPS, and etc.
* Install the compiled MUMPS solver with `02-ubuntu-openblas-compile.bash`

### References

1. <a name="ref1"></a> Dorozhinskii R (2019) [Configuration of a linear solver for linearly implicit time integration and efficient data transfer in parallel thermo-hydraulic computations](https://mediatum.ub.tum.de/doc/1486743/1486743.pdf). _Master's Thesis in Computational Science and Engineering._ Department of Informatics Technical University of Munich.
Expand Down
15 changes: 9 additions & 6 deletions russell_sparse/c_code/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
#include <inttypes.h>

#define SUCCESSFUL_EXIT 0
#define NULL_POINTER_ERROR 100000
#define MALLOC_ERROR 200000
#define VERSION_ERROR 300000
#define NOT_AVAILABLE 400000
#define NEED_FACTORIZATION 500000
#define C_BOOL int32_t
#define ERROR_NULL_POINTER 100000
#define ERROR_MALLOC 200000
#define ERROR_VERSION 300000
#define ERROR_NOT_AVAILABLE 400000
#define ERROR_NEED_INITIALIZATION 500000
#define ERROR_NEED_FACTORIZATION 600000
#define ERROR_ALREADY_INITIALIZED 700000
#define C_TRUE 1
#define C_FALSE 0

#define C_BOOL int32_t
162 changes: 85 additions & 77 deletions russell_sparse/c_code/interface_intel_dss.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,85 +74,95 @@ void solver_intel_dss_drop(struct InterfaceIntelDSS *solver) {
#endif
}

/// @brief Performs the factorization
/// @param solver Is a pointer to the solver interface
/// @note Output
/// @param determinant_coefficient determinant coefficient: det = coefficient * pow(2, exponent)
/// @param determinant_exponent determinant exponent: det = coefficient * pow(2, exponent)
/// @note Requests
/// @param compute_determinant Requests that determinant be computed
/// @note Matrix config
/// @param general_symmetric Whether the matrix is general symmetric (not necessarily positive-definite) or not
/// @param positive_definite Whether the matrix is symmetric and positive-definite or not
/// @param ndim Is the number of rows and columns of the coefficient matrix
/// @note Matrix
/// @param row_pointers The row pointers array with size = nrow + 1
/// @param col_indices The columns indices array with size = nnz (number of non-zeros)
/// @param values The values array with size = nnz (number of non-zeros)
/// @brief Defines the structure and reorder analysis
/// @return A success or fail code
int32_t solver_intel_dss_factorize(struct InterfaceIntelDSS *solver,
// output
double *determinant_coefficient,
double *determinant_exponent,
// requests
C_BOOL compute_determinant,
// matrix config
C_BOOL general_symmetric,
C_BOOL positive_definite,
int32_t ndim,
// matrix
const int32_t *row_pointers,
const int32_t *col_indices,
const double *values) {
int32_t solver_intel_dss_initialize(struct InterfaceIntelDSS *solver,
C_BOOL general_symmetric,
C_BOOL positive_definite,
int32_t ndim,
const int32_t *row_pointers,
const int32_t *col_indices) {
#ifdef WITH_INTEL_DSS

if (solver == NULL) {
return NULL_POINTER_ERROR;
return ERROR_NULL_POINTER;
}

// perform initialization
if (solver->initialization_completed == C_FALSE) {
solver->dss_opt = MKL_DSS_MSG_LVL_WARNING + MKL_DSS_TERM_LVL_ERROR + MKL_DSS_ZERO_BASED_INDEXING;
if (solver->initialization_completed == C_TRUE) {
return ERROR_ALREADY_INITIALIZED;
}

solver->dss_sym = MKL_DSS_NON_SYMMETRIC;
if (general_symmetric == C_TRUE) {
solver->dss_sym = MKL_DSS_SYMMETRIC;
}
solver->dss_opt = MKL_DSS_MSG_LVL_WARNING + MKL_DSS_TERM_LVL_ERROR + MKL_DSS_ZERO_BASED_INDEXING;

solver->dss_type = MKL_DSS_INDEFINITE;
if (positive_definite == C_TRUE) {
solver->dss_type = MKL_DSS_POSITIVE_DEFINITE;
}
solver->dss_sym = MKL_DSS_NON_SYMMETRIC;
if (general_symmetric == C_TRUE) {
solver->dss_sym = MKL_DSS_SYMMETRIC;
}

MKL_INT status = dss_create(solver->handle, solver->dss_opt);
solver->dss_type = MKL_DSS_INDEFINITE;
if (positive_definite == C_TRUE) {
solver->dss_type = MKL_DSS_POSITIVE_DEFINITE;
}

if (status != MKL_DSS_SUCCESS) {
return status;
}
MKL_INT status = dss_create(solver->handle, solver->dss_opt);

if (solver->handle == NULL) {
return NULL_POINTER_ERROR;
}
if (status != MKL_DSS_SUCCESS) {
return status;
}

solver->initialization_completed = C_TRUE;

// define the non-zero structure of the matrix
MKL_INT nnz = row_pointers[ndim];
status = dss_define_structure(solver->handle,
solver->dss_sym,
row_pointers,
ndim,
ndim,
col_indices,
nnz);
if (status != MKL_DSS_SUCCESS) {
return status;
}
if (solver->handle == NULL) {
return ERROR_NULL_POINTER;
}

// reorder the matrix (NOTE: we cannot call reorder again)
status = dss_reorder(solver->handle, solver->dss_opt, 0);
if (status != MKL_DSS_SUCCESS) {
return status;
}
// define the non-zero structure of the matrix
MKL_INT nnz = row_pointers[ndim];
status = dss_define_structure(solver->handle,
solver->dss_sym,
row_pointers,
ndim,
ndim,
col_indices,
nnz);
if (status != MKL_DSS_SUCCESS) {
return status;
}

// reorder the matrix (NOTE: we cannot call reorder again)
status = dss_reorder(solver->handle, solver->dss_opt, 0);
if (status != MKL_DSS_SUCCESS) {
return status;
}

solver->initialization_completed = C_TRUE;

return SUCCESSFUL_EXIT;

#else
UNUSED(solver);
UNUSED(general_symmetric);
UNUSED(positive_definite);
UNUSED(ndim);
UNUSED(row_pointers);
UNUSED(col_indices);
return ERROR_NOT_AVAILABLE;
#endif
}

/// @brief Performs the factorization
/// @return A success or fail code
int32_t solver_intel_dss_factorize(struct InterfaceIntelDSS *solver,
double *determinant_coefficient,
double *determinant_exponent,
C_BOOL compute_determinant,
const double *values) {
#ifdef WITH_INTEL_DSS

if (solver == NULL) {
return ERROR_NULL_POINTER;
}

if (solver->initialization_completed == C_FALSE) {
return ERROR_NEED_INITIALIZATION;
}

// factor the matrix
Expand All @@ -176,18 +186,14 @@ int32_t solver_intel_dss_factorize(struct InterfaceIntelDSS *solver,

// done
return status;

#else
UNUSED(solver);
UNUSED(determinant_coefficient);
UNUSED(determinant_exponent);
UNUSED(compute_determinant);
UNUSED(general_symmetric);
UNUSED(positive_definite);
UNUSED(ndim);
UNUSED(row_pointers);
UNUSED(col_indices);
UNUSED(values);
return NOT_AVAILABLE;
return ERROR_NOT_AVAILABLE;
#endif
}

Expand All @@ -200,16 +206,17 @@ int32_t solver_intel_dss_solve(struct InterfaceIntelDSS *solver,
double *x,
const double *rhs) {
#ifdef WITH_INTEL_DSS

if (solver == NULL) {
return NULL_POINTER_ERROR;
return ERROR_NULL_POINTER;
}

if (solver->handle == NULL) {
return NULL_POINTER_ERROR;
return ERROR_NULL_POINTER;
}

if (solver->factorization_completed == C_FALSE) {
return NEED_FACTORIZATION;
return ERROR_NEED_FACTORIZATION;
}

// get the solution vector
Expand All @@ -220,10 +227,11 @@ int32_t solver_intel_dss_solve(struct InterfaceIntelDSS *solver,
n_rhs,
x);
return status;

#else
UNUSED(solver);
UNUSED(x);
UNUSED(rhs);
return NOT_AVAILABLE;
return ERROR_NOT_AVAILABLE;
#endif
}
Loading
Loading