From dcfe5b25799289f16aa438fc8d6d96510d5e10c5 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 16:11:24 -0600 Subject: [PATCH 01/25] only test on 3.10+ --- .github/workflows/python-package-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index e276185..aaa5e0a 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -23,7 +23,7 @@ jobs: fail-fast: False matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12"] mkl-version: ['2023'] # currently 2024 fails building for some reason... include: - os: ubuntu-latest From 3224d4e0178a0303604ef43f6adec36ee9ab277b Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 16:11:39 -0600 Subject: [PATCH 02/25] rely on b_coverage flag --- meson.options | 2 -- pydiso/meson.build | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/meson.options b/meson.options index f3968b5..6440a29 100644 --- a/meson.options +++ b/meson.options @@ -1,5 +1,3 @@ -option('cy_coverage', type : 'boolean', value : false) - option('use-sdl', type: 'boolean', value: true, description: 'Use the single dynamic library.') diff --git a/pydiso/meson.build b/pydiso/meson.build index fd7f01c..4371952 100644 --- a/pydiso/meson.build +++ b/pydiso/meson.build @@ -95,7 +95,7 @@ cython_c_args = [numpy_nodepr_api, use_math_defines]\ cython_file = 'mkl_solver.pyx' cython_file_full_path = meson.current_source_dir() / cython_file -if get_option('cy_coverage') +if get_option('b_coverage') # tell cython to enable linetracing add_project_arguments(['--directive', 'linetrace=true'], language : 'cython') # tell the c_compiler to definie the CYTHON_TRACE_NOGIL From 6073d53cb48b1e0208eac31bf797772f4cfebe9f Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 16:43:41 -0600 Subject: [PATCH 03/25] updates for numpy 2.0.0 --- .github/workflows/python-package-conda.yml | 6 ++-- meson.build | 2 +- pydiso/meson.build | 42 ++-------------------- pyproject.toml | 27 ++------------ 4 files changed, 10 insertions(+), 67 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index aaa5e0a..e3b8438 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -55,7 +55,7 @@ jobs: - name: Create environment run: | - mamba install --quiet --yes pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \ + conda install --quiet --yes pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \ mkl-devel pkg-config meson-python meson ninja setuptools_scm \ ${{ matrix.coverage && 'coverage' || ''}} @@ -63,7 +63,7 @@ jobs: run: | python -m pip install --no-build-isolation --verbose --editable . \ --config-setting=compile-args=-v \ - ${{ matrix.coverage && '--config-settings=setup-args="-Dcy_coverage=true"' || ''}} + ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} conda list - name: Run Tests @@ -107,7 +107,7 @@ jobs: - name: Create environment run: | - mamba install --quiet --yes pip numpy scipy cython mkl=2023 \ + conda install --quiet --yes pip numpy scipy cython mkl=2023 \ mkl-devel pkg-config meson-python meson ninja setuptools_scm \ python-build diff --git a/meson.build b/meson.build index 3303d66..29b269a 100644 --- a/meson.build +++ b/meson.build @@ -14,7 +14,7 @@ print(get_version())''' ).stdout().strip(), license: 'MIT', - meson_version: '>= 1.1.0', + meson_version: '>= 1.4.0', default_options: [ 'buildtype=debugoptimized', 'b_ndebug=if-release', diff --git a/pydiso/meson.build b/pydiso/meson.build index 4371952..67c9464 100644 --- a/pydiso/meson.build +++ b/pydiso/meson.build @@ -1,38 +1,5 @@ # NumPy include directory -# The try-except is needed because when things are -# split across drives on Windows, there is no relative path and an exception -# gets raised. There may be other such cases, so add a catch-all and switch to -# an absolute path. Relative paths are needed when for example a virtualenv is -# placed inside the source tree; Meson rejects absolute paths to places inside -# the source tree. -# For cross-compilation it is often not possible to run the Python interpreter -# in order to retrieve numpy's include directory. It can be specified in the -# cross file instead: -# [properties] -# numpy-include-dir = /abspath/to/host-pythons/site-packages/numpy/core/include -# -# This uses the path as is, and avoids running the interpreter. -incdir_numpy = meson.get_external_property('numpy-include-dir', 'not-given') -if incdir_numpy == 'not-given' - incdir_numpy = run_command(py, - [ - '-c', - '''import os -import numpy as np -try: - incdir = os.path.relpath(np.get_include()) -except Exception: - incdir = np.get_include() -print(incdir) - ''' - ], - check: true - ).stdout().strip() -else - _incdir_numpy_abs = incdir_numpy -endif -inc_np = include_directories(incdir_numpy) -np_dep = declare_dependency(include_directories: inc_np) +np_dep = dependency('numpy') # MKL-specific options mkl_dep_name = 'mkl-dynamic' @@ -87,13 +54,10 @@ else use_math_defines = [] endif -numpy_nodepr_api = '-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION' c_undefined_ok = ['-Wno-maybe-uninitialized'] - -cython_c_args = [numpy_nodepr_api, use_math_defines]\ +cython_c_args = [numpy_nodepr_api, use_math_defines, '-DCYTHON_CCOMPLEX=0'] cython_file = 'mkl_solver.pyx' -cython_file_full_path = meson.current_source_dir() / cython_file if get_option('b_coverage') # tell cython to enable linetracing @@ -103,6 +67,7 @@ if get_option('b_coverage') # compile the .c file from the .pyx file in it's directory. # These should include the default options passed to the cython compiler + cython_file_full_path = meson.current_source_dir() / cython_file run_command(cython, '-M', '--fast-fail', '-3', '--directive', 'linetrace=true', cython_file_full_path) endif @@ -111,7 +76,6 @@ module_path = 'pydiso' py.extension_module( 'mkl_solver', cython_file, - include_directories: incdir_numpy, c_args: cython_c_args, install: true, subdir: module_path, diff --git a/pyproject.toml b/pyproject.toml index 0178128..e0d3b8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,31 +2,10 @@ [build-system] build-backend = 'mesonpy' requires = [ - "meson-python>=0.14.0", - "Cython>=0.29.35", # when updating version, also update check in meson.build + "meson-python>=0.15.0", + "Cython>=3.0.8", "setuptools_scm[toml]>=6.2", - - # This package automatically provides all of the numpy pinning for different python - # versions and runtime requirements. - "oldest-supported-numpy", - - # The following is taken from scipy's pyproject.toml file to handle - # building against the proper numpy API - - # When numpy 2.0.0rc1 comes out, we should update this to build against 2.0, - # and then runtime depend on the range 1.22.X to <2.3. No need to switch to - # 1.25.2 in the meantime (1.25.x is the first version which exports older C - # API versions by default). - - # default numpy requirements - # "numpy==1.22.4; python_version<='3.10' and platform_python_implementation != 'PyPy'", - # "numpy==1.23.2; python_version=='3.11' and platform_python_implementation != 'PyPy'", - - # For Python versions which aren't yet officially supported, we specify an - # unpinned NumPy which allows source distributions to be used and allows - # wheels to be used as soon as they become available. - # "numpy>=1.26.0b1; python_version>='3.12'", - # "numpy; python_version>='3.8' and platform_python_implementation=='PyPy'", + "numpy>=2.0.0rc1", ] [project] From 117177bfe1361c47a9b76bd4503f9afcc1f4fe93 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 16:48:13 -0600 Subject: [PATCH 04/25] test on latest-large --- .github/workflows/python-package-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index e3b8438..2cc1305 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -22,7 +22,7 @@ jobs: strategy: fail-fast: False matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-latest-large, windows-latest] python-version: ["3.10", "3.11", "3.12"] mkl-version: ['2023'] # currently 2024 fails building for some reason... include: From 4d7f74f8bfe0304dba1e15e8f360f02f2a855f0d Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 16:51:26 -0600 Subject: [PATCH 05/25] add numpy dep warning back --- .github/workflows/python-package-conda.yml | 2 +- pydiso/meson.build | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 2cc1305..8e1e6d8 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -22,7 +22,7 @@ jobs: strategy: fail-fast: False matrix: - os: [ubuntu-latest, macos-latest-large, windows-latest] + os: [ubuntu-latest, macos-12, windows-latest] python-version: ["3.10", "3.11", "3.12"] mkl-version: ['2023'] # currently 2024 fails building for some reason... include: diff --git a/pydiso/meson.build b/pydiso/meson.build index 67c9464..619a5dc 100644 --- a/pydiso/meson.build +++ b/pydiso/meson.build @@ -1,5 +1,6 @@ # NumPy include directory np_dep = dependency('numpy') +numpy_nodepr_api = ['-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION'] # MKL-specific options mkl_dep_name = 'mkl-dynamic' From b655371ef8ca15e9e63042e5b5b84543d038a19a Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 17:02:04 -0600 Subject: [PATCH 06/25] update test --- tests/test_pydiso.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py index d5c715a..d4caf63 100644 --- a/tests/test_pydiso.py +++ b/tests/test_pydiso.py @@ -34,8 +34,8 @@ 'real_nonsymmetric': L@U2 } A_complex_dict = {'complex_structurally_symmetric': Lc@Uc, - 'complex_hermitian_positive_definite': Lc@Lc.H, - 'complex_hermitian_indefinite': Lc@D@Lc.H, + 'complex_hermitian_positive_definite': Lc@Lc.T.conjugate(), + 'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(), 'complex_symmetric': Lc@Lc.T, 'complex_nonsymmetric': Lc@U2c } From c8a681e2dd08f69fa5e3dbc201d2ee11298d2841 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Mon, 9 Sep 2024 23:44:29 -0600 Subject: [PATCH 07/25] test mkl 2024 (exclude on high macos versions --- .github/workflows/python-package-conda.yml | 11 ++++------- pydiso/mkl_solver.pyx | 10 +++++----- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 8e1e6d8..5913a53 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -24,17 +24,14 @@ jobs: matrix: os: [ubuntu-latest, macos-12, windows-latest] python-version: ["3.10", "3.11", "3.12"] - mkl-version: ['2023'] # currently 2024 fails building for some reason... + mkl-version: ['2023', '2024'] include: - os: ubuntu-latest python-version: "3.12" coverage: ${{ true }} - - os: ubuntu-latest - python-version: "3.12" - mkl-version: '2024' - - os: windows-latest - python-version: "3.12" - mkl-version: '2024' + exclude: + - os: macos-12 + mkl-version: "2024" steps: - uses: actions/checkout@v4 diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index bc82dd3..dc34bf4 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -1,7 +1,6 @@ #cython: language_level=3 -cimport numpy as np +cimport numpy as cnp import cython -from cython cimport numeric from cpython.pythread cimport ( PyThread_type_lock, PyThread_allocate_lock, @@ -403,8 +402,8 @@ cdef class MKLPardisoSolver: raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}") x = np.require(x, requirements='F') - cdef void * bp = np.PyArray_DATA(b) - cdef void * xp = np.PyArray_DATA(x) + cdef void * bp = cnp.PyArray_DATA(b) + cdef void * xp = cnp.PyArray_DATA(x) if bp == xp: raise PardisoError("b and x must be different arrays") @@ -510,7 +509,7 @@ cdef class MKLPardisoSolver: cdef _set_A(self, data): self._Adata = data - self.a = np.PyArray_DATA(data) + self.a = cnp.PyArray_DATA(data) def __dealloc__(self): # Need to call pardiso with phase=-1 to release memory @@ -542,6 +541,7 @@ cdef class MKLPardisoSolver: if self.lock: #dealloc lock PyThread_free_lock(self.lock) + self.lock = NULL cdef _analyze(self): #phase = 11 From 9914874a316492f1b70187df9862209f19c8ef14 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Thu, 12 Sep 2024 08:49:31 -0600 Subject: [PATCH 08/25] write out the dtype, temporarily remove a test --- tests/test_pydiso.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py index d4caf63..e319272 100644 --- a/tests/test_pydiso.py +++ b/tests/test_pydiso.py @@ -30,7 +30,7 @@ A_real_dict = {'real_structurally_symmetric': L@U, 'real_symmetric_positive_definite': L@L.T, - 'real_symmetric_indefinite': L@D@L.T, + #'real_symmetric_indefinite': L@D@L.T, 'real_nonsymmetric': L@U2 } A_complex_dict = {'complex_structurally_symmetric': Lc@Uc, @@ -86,6 +86,7 @@ def test_version(): @pytest.mark.parametrize("A, matrix_type", inputs) def test_solver(A, matrix_type): dtype = A.dtype + print(dtype) if np.issubdtype(dtype, np.complexfloating): x = xc.astype(dtype) else: From 537c209648362924fc21f2a12f499cf4f83d06d2 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Thu, 12 Sep 2024 13:13:12 -0600 Subject: [PATCH 09/25] use vsenv to build on windows --- .github/workflows/python-package-conda.yml | 3 ++- pydiso/mkl_solver.pyx | 1 - tests/test_pydiso.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 5913a53..ebac542 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -60,7 +60,8 @@ jobs: run: | python -m pip install --no-build-isolation --verbose --editable . \ --config-setting=compile-args=-v \ - ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} + ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} \ + ${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="--vsenv"' || ''}} conda list - name: Run Tests diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index dc34bf4..802e4fc 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -16,7 +16,6 @@ import os cdef extern from 'mkl.h': ctypedef long long MKL_INT64 - ctypedef unsigned long long MKL_UINT64 ctypedef int MKL_INT ctypedef MKL_INT int_t diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py index e319272..addd733 100644 --- a/tests/test_pydiso.py +++ b/tests/test_pydiso.py @@ -30,7 +30,7 @@ A_real_dict = {'real_structurally_symmetric': L@U, 'real_symmetric_positive_definite': L@L.T, - #'real_symmetric_indefinite': L@D@L.T, + 'real_symmetric_indefinite': L@D@L.T, 'real_nonsymmetric': L@U2 } A_complex_dict = {'complex_structurally_symmetric': Lc@Uc, From 3a5e30af1f10f9157d25d9b21d58788cb88fad3d Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 14:15:18 -0600 Subject: [PATCH 10/25] add some debug statements --- pydiso/mkl_solver.pyx | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index 802e4fc..f4e5021 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -301,6 +301,18 @@ cdef class MKLPardisoSolver: #self._is_32 = integer_len == sizeof(int_t) self._is_32 = sizeof(int_t) == 8 or integer_len == sizeof(int_t) + print("Am I calling _PardisoParams?") + print(self._is_32) + + print("What is the matrix's indices itemsize?") + print(integer_len) + + print("What is the matrix's indptr itemsize?") + print(A.indptr.itemsize) + + print("What is the size of MKL_INT") + print(sizeof(int_t)) + if self._is_32: self._par = _PardisoParams() self._initialize(self._par, A, matrix_type, verbose) @@ -458,6 +470,7 @@ cdef class MKLPardisoSolver: else: int_dtype = 'i8' par.n = A.shape[0] + print("Initializing permutation array with ", int_dtype, "dtype") par.perm = np.empty(par.n, dtype=int_dtype) par.maxfct = 1 From 85a914b99a98d254130013f9981258b5902be768 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 14:44:58 -0600 Subject: [PATCH 11/25] some more debug --- pydiso/mkl_solver.pyx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index f4e5021..ddacbdb 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -358,6 +358,7 @@ cdef class MKLPardisoSolver: self._factor() cdef _initialized(self): + # If any of the handle pointers are not null, return 1 cdef int i for i in range(64): if self.handle[i]: @@ -528,7 +529,10 @@ cdef class MKLPardisoSolver: cdef int_t phase=-1, nrhs=0, error=0 cdef long_t phase64=-1, nrhs64=0, error64=0 + print("Deallocating myself") + if self._initialized(): + print("I was initialized, so call pardiso to release the memory.") with nogil: PyThread_acquire_lock(self.lock, 1) if self._is_32: @@ -551,9 +555,13 @@ cdef class MKLPardisoSolver: self.handle[i] = NULL if self.lock: + print("I had a lock") #dealloc lock PyThread_free_lock(self.lock) self.lock = NULL + print("lock is no more lock") + + print("Deallocated myself") cdef _analyze(self): #phase = 11 From b8d82de0b6d547e9c0bc87da6517d3623c14ca9a Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 19:54:13 -0600 Subject: [PATCH 12/25] explicitly initialize numpy's c api --- pydiso/mkl_solver.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index ddacbdb..ca1b725 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -14,6 +14,8 @@ import numpy as np import scipy.sparse as sp import os +np.import_array() + cdef extern from 'mkl.h': ctypedef long long MKL_INT64 ctypedef int MKL_INT From 1c2e5f11e309adf5241d349937dc7ed067824aa8 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 19:55:04 -0600 Subject: [PATCH 13/25] fix --- pydiso/mkl_solver.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index ca1b725..5f6e54f 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -1,5 +1,5 @@ #cython: language_level=3 -cimport numpy as cnp +cimport numpy as np import cython from cpython.pythread cimport ( PyThread_type_lock, @@ -14,6 +14,8 @@ import numpy as np import scipy.sparse as sp import os +# We use np.PyArray_DATA to grab the pointer +# to a numpy array. np.import_array() cdef extern from 'mkl.h': From 17554b5ecaafb588085b61db3e942b050cae1b9f Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 19:57:58 -0600 Subject: [PATCH 14/25] fix more --- pydiso/mkl_solver.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index 5f6e54f..b15c352 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -418,8 +418,8 @@ cdef class MKLPardisoSolver: raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}") x = np.require(x, requirements='F') - cdef void * bp = cnp.PyArray_DATA(b) - cdef void * xp = cnp.PyArray_DATA(x) + cdef void * bp = np.PyArray_DATA(b) + cdef void * xp = np.PyArray_DATA(x) if bp == xp: raise PardisoError("b and x must be different arrays") @@ -526,7 +526,7 @@ cdef class MKLPardisoSolver: cdef _set_A(self, data): self._Adata = data - self.a = cnp.PyArray_DATA(data) + self.a = np.PyArray_DATA(data) def __dealloc__(self): # Need to call pardiso with phase=-1 to release memory From 68a5af3aeb9e14f5ea46efe244299b16e071fe6c Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 20:07:13 -0600 Subject: [PATCH 15/25] flush standard out --- tests/test_pydiso.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py index addd733..5f65254 100644 --- a/tests/test_pydiso.py +++ b/tests/test_pydiso.py @@ -94,6 +94,7 @@ def test_solver(A, matrix_type): b = A@x solver = Solver(A, matrix_type=matrix_type) + sys.stdout.flush() x2 = solver.solve(b) eps = np.finfo(dtype).eps From 579df3a4cda1b2c036225e815c5a05cda280ed0f Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 20:08:57 -0600 Subject: [PATCH 16/25] make more obvious when the test fails --- .github/workflows/python-package-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index ebac542..e0a2b5b 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -68,7 +68,7 @@ jobs: run: | ${{ matrix.coverage && 'coverage run -m' || '' }} pytest -s -v ${{ matrix.coverage && 'coverage xml' || '' }} - continue-on-error: ${{ matrix.os == 'windows-latest' && matrix.mkl-version == '2024'}} +# continue-on-error: ${{ matrix.os == 'windows-latest' && matrix.mkl-version == '2024'}} - name: Upload coverage if: ${{ matrix.coverage }} From 5d47631c7555a26ce5318fea44271e2653b5812b Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 20:17:25 -0600 Subject: [PATCH 17/25] More! --- .github/workflows/python-package-conda.yml | 6 +++--- pydiso/mkl_solver.pyx | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index e0a2b5b..d2fd583 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -39,8 +39,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3 with: python-version: ${{ matrix.python-version }} - mamba-version: '*' - channels: conda-forge, defaults + channels: defaults channel-priority: true activate-environment: dev @@ -52,7 +51,8 @@ jobs: - name: Create environment run: | - conda install --quiet --yes pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \ + conda install --quiet --yes -c conda-forge \ + pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \ mkl-devel pkg-config meson-python meson ninja setuptools_scm \ ${{ matrix.coverage && 'coverage' || ''}} diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index b15c352..e7a075f 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -13,6 +13,7 @@ import warnings import numpy as np import scipy.sparse as sp import os +import sys # We use np.PyArray_DATA to grab the pointer # to a numpy array. @@ -418,9 +419,13 @@ cdef class MKLPardisoSolver: raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}") x = np.require(x, requirements='F') + print("Validated input arrays") + cdef void * bp = np.PyArray_DATA(b) cdef void * xp = np.PyArray_DATA(x) + print("assigned x and b pointers") + if bp == xp: raise PardisoError("b and x must be different arrays") @@ -430,6 +435,8 @@ cdef class MKLPardisoSolver: self.set_iparm(11, 2) else: self.set_iparm(11, 0) + + print("updated iparm 11") self._solve(bp, xp, nrhs) return x @@ -591,6 +598,8 @@ cdef class MKLPardisoSolver: if(not self._factored): raise PardisoError("Cannot solve without a previous factorization.") + print("trying to solve") + with nogil: err = self._run_pardiso(33, b, x, nrhs_in) @@ -604,9 +613,16 @@ cdef class MKLPardisoSolver: PyThread_acquire_lock(self.lock, 1) if self._is_32: + with gil: + print("Calling pardiso") + sys.stdout.flush() + pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype, &phase, &self._par.n, self.a, &self._par.ia[0], &self._par.ja[0], &self._par.perm[0], &nrhs, self._par.iparm, &self._par.msglvl, b, x, &error) + + with gil: + print("Called pardiso, error was ", error) else: pardiso_64(self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype, &phase64, &self._par64.n, self.a, &self._par64.ia[0], &self._par64.ja[0], From 9d870f266d326e1640a7f908b1b4c6eb91cb4790 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 21:23:44 -0600 Subject: [PATCH 18/25] Manually increment and decrement the x and b array's refcounts --- pydiso/mkl_solver.pyx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index e7a075f..cbada90 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -1,6 +1,10 @@ #cython: language_level=3 cimport numpy as np import cython +from cpython.ref cimport( + Py_INCREF, + Py_DECREF, +) from cpython.pythread cimport ( PyThread_type_lock, PyThread_allocate_lock, @@ -421,7 +425,9 @@ cdef class MKLPardisoSolver: print("Validated input arrays") + Py_INCREF(b) cdef void * bp = np.PyArray_DATA(b) + Py_INCREF(x) cdef void * xp = np.PyArray_DATA(x) print("assigned x and b pointers") @@ -437,7 +443,13 @@ cdef class MKLPardisoSolver: self.set_iparm(11, 0) print("updated iparm 11") - self._solve(bp, xp, nrhs) + try: + self._solve(bp, xp, nrhs) + except Exception as err: + raise err + finally: + Py_DECREF(b) + Py_DECREF(x) return x @property From 1edad1b923a71a86d69dd86aaaa769a2de81538d Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 21:34:19 -0600 Subject: [PATCH 19/25] flush here too --- pydiso/mkl_solver.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index cbada90..01ddf2d 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -433,6 +433,8 @@ cdef class MKLPardisoSolver: print("assigned x and b pointers") if bp == xp: + Py_DECREF(b) + Py_DECREF(x) raise PardisoError("b and x must be different arrays") cdef int_t nrhs = b.shape[1] if b.ndim == 2 else 1 @@ -635,6 +637,7 @@ cdef class MKLPardisoSolver: with gil: print("Called pardiso, error was ", error) + sys.stdout.flush() else: pardiso_64(self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype, &phase64, &self._par64.n, self.a, &self._par64.ia[0], &self._par64.ja[0], From 577203159c93f436c5206f92c50cf132e579f894 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 21:52:27 -0600 Subject: [PATCH 20/25] ensure all A matrix information is c-contiguous. --- pydiso/mkl_solver.pyx | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index 01ddf2d..1bfbe89 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -542,11 +542,11 @@ cdef class MKLPardisoSolver: par.iparm[55] = 0 # Internal function used to work with pivot and calculation of diagonal arrays turned off. par.iparm[59] = 0 # operate in-core mode - par.ia = np.require(A.indptr, dtype=int_dtype) - par.ja = np.require(A.indices, dtype=int_dtype) + par.ia = np.require(A.indptr, dtype=int_dtype, requirements='C') + par.ja = np.require(A.indices, dtype=int_dtype, requirements='C') cdef _set_A(self, data): - self._Adata = data + self._Adata = np.require(data, requirements='C') self.a = np.PyArray_DATA(data) def __dealloc__(self): @@ -629,6 +629,17 @@ cdef class MKLPardisoSolver: if self._is_32: with gil: print("Calling pardiso") + + print("maxfct", self._par.maxfct) + print("mnum", self._par.mnum) + print("mtype", self._par.mtype) + print("phase", phase) + print("n", self._par.n) + print("nrhs" nrhs) + print("msglvl", self._par.msglvl) + print("ia") + for i in range(self._par.n): + print() sys.stdout.flush() pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype, From 00871be032e9d76ca739bffd1996d98841f0fe09 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Fri, 13 Sep 2024 22:08:54 -0600 Subject: [PATCH 21/25] information --- pydiso/mkl_solver.pyx | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx index 1bfbe89..971ab52 100644 --- a/pydiso/mkl_solver.pyx +++ b/pydiso/mkl_solver.pyx @@ -635,11 +635,29 @@ cdef class MKLPardisoSolver: print("mtype", self._par.mtype) print("phase", phase) print("n", self._par.n) - print("nrhs" nrhs) + print("nrhs", nrhs) print("msglvl", self._par.msglvl) print("ia") - for i in range(self._par.n): - print() + n = self._par.n + for i in range(n+1): + print(self._par.ia[i]) + nnz = self._par.ia[n] + print("ja") + for i in range(nnz): + print(self._par.ja[i]) + + print("a") + for i in range(nnz): + print(self._Adata[i]) + if x: + print("x") + for i in range(n): + x[i] + if b: + print("b") + for i in range(n): + b[i] + sys.stdout.flush() pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype, From cd7ce76a5ababaadb3970c9df0e32b6d44011b96 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Wed, 18 Sep 2024 23:56:49 -0600 Subject: [PATCH 22/25] re-configure use template for integer type finalize add space don't worry about math_defines (not using anything special from it anyways) fix extra line continuation character fix meson build does tbb work? set seq as default use mkl for blas on windows --- .github/workflows/python-package-conda.yml | 6 +- pydiso/_mkl_solver.pyx.in | 299 +++++++++ pydiso/meson.build | 47 +- pydiso/mkl_solver.py | 352 +++++++++++ pydiso/mkl_solver.pyx | 676 --------------------- pyproject.toml | 2 +- tests/test_pydiso.py | 21 +- 7 files changed, 685 insertions(+), 718 deletions(-) create mode 100644 pydiso/_mkl_solver.pyx.in create mode 100644 pydiso/mkl_solver.py delete mode 100644 pydiso/mkl_solver.pyx diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index d2fd583..bfe46df 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -54,14 +54,16 @@ jobs: conda install --quiet --yes -c conda-forge \ pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \ mkl-devel pkg-config meson-python meson ninja setuptools_scm \ - ${{ matrix.coverage && 'coverage' || ''}} + ${{ matrix.coverage && 'coverage' || ''}} \ + ${{ matrix.os == 'windows-latest' && '"libblas=*=*mkl"' || ''}} - name: Install Our Package run: | python -m pip install --no-build-isolation --verbose --editable . \ --config-setting=compile-args=-v \ ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} \ - ${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="--vsenv"' || ''}} + ${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="-Dvsenv=true"' || ''}} + conda list - name: Run Tests diff --git a/pydiso/_mkl_solver.pyx.in b/pydiso/_mkl_solver.pyx.in new file mode 100644 index 0000000..e7449b2 --- /dev/null +++ b/pydiso/_mkl_solver.pyx.in @@ -0,0 +1,299 @@ +#cython: language_level=3 +cimport numpy as np +import cython +from cpython.pythread cimport ( + PyThread_type_lock, + PyThread_allocate_lock, + PyThread_acquire_lock, + PyThread_release_lock, + PyThread_free_lock +) + +import numpy as np +import os + +# We use np.PyArray_DATA to grab the pointer +# to a numpy array. +np.import_array() + +cdef extern from 'mkl.h': + ctypedef long long MKL_INT64 + ctypedef int MKL_INT + +ctypedef MKL_INT int_t +ctypedef MKL_INT64 long_t + +cdef extern from 'mkl.h': + int MKL_DOMAIN_PARDISO + + ctypedef struct MKLVersion: + int MajorVersion + int MinorVersion + int UpdateVersion + char * ProductStatus + char * Build + char * Processor + char * Platform + + void mkl_get_version(MKLVersion* pv) + + void mkl_set_num_threads(int nth) + int mkl_domain_set_num_threads(int nt, int domain) + int mkl_get_max_threads() + int mkl_domain_get_max_threads(int domain) + + ctypedef int (*ProgressEntry)(int* thread, int* step, char* stage, int stage_len) except? -1; + ProgressEntry mkl_set_progress(ProgressEntry progress); + + ctypedef void * _MKL_DSS_HANDLE_t + + void pardiso(_MKL_DSS_HANDLE_t, const int_t*, const int_t*, const int_t*, + const int_t *, const int_t *, const void *, const int_t *, + const int_t *, int_t *, const int_t *, int_t *, + const int_t *, void *, void *, int_t *) nogil + + void pardiso_64(_MKL_DSS_HANDLE_t, const long_t *, const long_t *, const long_t *, + const long_t *, const long_t *, const void *, const long_t *, + const long_t *, long_t *, const long_t *, long_t *, + const long_t *, void *, void *, long_t *) nogil + + +_err_messages = {0:"no error", + -1:'input inconsistent', + -2:'not enough memory', + -3:'reordering problem', + -4:'zero pivot, numerical factorization or iterative refinement problem', + -5:'unclassified (internal) error', + -6:'reordering failed', + -7:'diagonal matrix is singular', + -8:'32-bit integer overflow problem', + -9:'not enough memory for OOC', + -10:'error opening OOC files', + -11:'read/write error with OOC files', + -12:'pardiso_64 called from 32-bit library', + } + +class PardisoError(Exception): + pass + +class PardisoWarning(UserWarning): + pass + + +#call pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, perm, nrhs, iparm, msglvl, b, x, error) +cdef int mkl_progress(int *thread, int* step, char* stage, int stage_len) nogil: + # must be a nogil process to pass to mkl pardiso progress reporting + with gil: + # must reacquire the gil to print out back to python. + print(thread[0], step[0], stage, stage_len) + return 0 + +cdef int mkl_no_progress(int *thread, int* step, char* stage, int stage_len) nogil: + return 0 + + +def get_mkl_max_threads(): + """ + Returns the current number of openMP threads available to the MKL Library + """ + return mkl_get_max_threads() + +def get_mkl_pardiso_max_threads(): + """ + Returns the current number of openMP threads available to the Pardiso functions + """ + return mkl_domain_get_max_threads(MKL_DOMAIN_PARDISO) + +def set_mkl_threads(num_threads=None): + """ + Sets the number of openMP threads available to the MKL library. + + Parameters + ---------- + num_threads : None or int + number of threads to use for the MKL library. + None will set the number of threads to that returned by `os.cpu_count()`. + """ + if num_threads is None: + num_threads = os.cpu_count() + elif num_threads<=0: + raise ValueError('Number of threads must be greater than 0') + mkl_set_num_threads(num_threads) + +def set_mkl_pardiso_threads(num_threads=None): + """ + Sets the number of openMP threads available to the Pardiso functions + + Parameters + ---------- + num_threads : None or int + Number of threads to use for the MKL Pardiso routines. + None (or 0) will set the number of threads to `get_mkl_max_threads` + """ + if num_threads is None: + num_threads = 0 + elif num_threads<0: + raise ValueError('Number of threads must be greater than 0') + mkl_domain_set_num_threads(num_threads, MKL_DOMAIN_PARDISO) + +def get_mkl_version(): + """ + Returns a dictionary describing the version of Intel Math Kernel Library used + """ + cdef MKLVersion vers + mkl_get_version(&vers) + return vers + +def get_mkl_int_size(): + """Return the size of the MKL_INT at compile time in bytes. + + Returns + ------- + int + """ + return sizeof(MKL_INT) + + +def get_mkl_int64_size(): + """Return the size of the MKL_INT64 at compile time in bytes. + + Returns + ------- + int + """ + return sizeof(MKL_INT64) + + + +ctypedef fused real_or_complex: + np.float32_t + np.float64_t + np.complex64_t + np.complex128_t + + +{{for int_type in ["int_t", "long_t"]}} +cdef class _PardisoHandle_{{int_type}}: + cdef _MKL_DSS_HANDLE_t handle[64] + cdef PyThread_type_lock lock + + cdef {{int_type}} n, maxfct, mnum, msglvl + cdef public {{int_type}} matrix_type + cdef public {{int_type}}[64] iparm + cdef public {{int_type}}[:] perm + + @cython.boundscheck(False) + def __cinit__(self, A_dat_dtype, n, matrix_type, maxfct, mnum, msglvl): + self.lock = PyThread_allocate_lock() + + np_int_dtype = np.dtype(f"i{sizeof({{int_type}})}") + + for i in range(64): + self.handle[i] = NULL + + self.n = n + self.matrix_type = matrix_type + self.maxfct = maxfct + self.mnum = mnum + self.msglvl = msglvl + + if self.msglvl: + #for reporting factorization progress via python's `print` + mkl_set_progress(mkl_progress) + else: + mkl_set_progress(mkl_no_progress) + + is_single_precision = np.issubdtype(A_dat_dtype, np.single) or np.issubdtype(A_dat_dtype, np.csingle) + + self.perm = np.empty(self.n, dtype=np_int_dtype) + + for i in range(64): + self.iparm[i] = 0 # ensure these all start at 0 + + # set default parameters + self.iparm[0] = 1 # tell pardiso to not reset these values on the first call + self.iparm[1] = 2 # The nested dissection algorithm from the METIS + self.iparm[3] = 0 # The factorization is always computed as required by phase. + self.iparm[4] = 2 # fill perm with computed permutation vector + self.iparm[5] = 0 # The array x contains the solution; right-hand side vector b is kept unchanged. + self.iparm[7] = 0 # The solver automatically performs two steps of iterative refinement when perterbed pivots are obtained + self.iparm[9] = 13 if matrix_type in [11, 13] else 8 + self.iparm[10] = 1 if matrix_type in [11, 13] else 0 + self.iparm[11] = 0 # Solve a linear system AX = B (as opposed to A.T or A.H) + self.iparm[12] = 1 if matrix_type in [11, 13] else 0 + self.iparm[17] = -1 # Return the number of non-zeros in this value after first call + self.iparm[18] = 0 # do not report flop count + self.iparm[20] = 1 if matrix_type in [-2, -4, 6] else 0 + self.iparm[23] = 0 # classic (not parallel) factorization + self.iparm[24] = 0 # default behavoir of parallel solving + self.iparm[26] = 1 # Do not check the input matrix + self.iparm[27] = is_single_precision # 1 if single, 0 if double + self.iparm[30] = 0 # this would be used to enable sparse input/output for solves + self.iparm[33] = 0 # optimal number of thread for CNR mode + self.iparm[34] = 1 # zero based indexing + self.iparm[35] = 0 # Do not compute schur complement + self.iparm[36] = 0 # use CSR storage format + self.iparm[38] = 0 # Do not use low rank update + self.iparm[42] = 0 # Do not compute the diagonal of the inverse + self.iparm[55] = 0 # Internal function used to work with pivot and calculation of diagonal arrays turned off. + self.iparm[59] = 0 # operate in-core mode + + def initialized(self): + return self._initialized() + + cdef int _initialized(self) noexcept nogil: + # If any of the handle pointers are not null, return 1 + cdef int i + for i in range(64): + if self.handle[i]: + return 1 + return 0 + + def set_iparm(self, {{int_type}} i, {{int_type}} val): + self.iparm[i] = val + + @cython.boundscheck(False) + cpdef {{int_type}} call_pardiso(self, + {{int_type}} phase, + real_or_complex[::1] a_data, + {{int_type}}[::1] a_indptr, + {{int_type}}[::1] a_indices, + real_or_complex[::1, :] rhs, + real_or_complex[::1, :] out + ): + cdef {{int_type}} error, nrhs + with nogil: + nrhs = rhs.shape[1] + PyThread_acquire_lock(self.lock, mode=1) + pardiso{{if int_type == "long_t"}}_64{{endif}}( + self.handle, &self.maxfct, &self.mnum, &self.matrix_type, &phase, &self.n, + &a_data[0], &a_indptr[0], &a_indices[0], &self.perm[0], + &nrhs, self.iparm, &self.msglvl, + &rhs[0, 0], &out[0, 0], &error + ) + PyThread_release_lock(self.lock) + return error + + @cython.boundscheck(False) + def __dealloc__(self): + # Need to call pardiso with phase=-1 to release memory (if it was initialized) + cdef {{int_type}} phase = -1, nrhs = 0, error = 0 + + with nogil: + PyThread_acquire_lock(self.lock, mode=1) + if self._initialized(): + pardiso{{if int_type == "long_t"}}_64{{endif}}( + self.handle, &self.maxfct, &self.mnum, &self.matrix_type, + &phase, &self.n, NULL, NULL, NULL, NULL, &nrhs, self.iparm, + &self.msglvl, NULL, NULL, &error) + if error == 0: + for i in range(64): + self.handle[i] = NULL + PyThread_release_lock(self.lock) + if error != 0: + raise MemoryError("Pardiso Memory release error: " + _err_messages[error]) + if self.lock: + #deallocate the lock + PyThread_free_lock(self.lock) + self.lock = NULL +{{endfor}} \ No newline at end of file diff --git a/pydiso/meson.build b/pydiso/meson.build index 619a5dc..5bdd8a9 100644 --- a/pydiso/meson.build +++ b/pydiso/meson.build @@ -1,3 +1,19 @@ +cython_file = custom_target( + input: '_mkl_solver.pyx.in', + output: '_mkl_solver.pyx', + command: [py, + '-c', +''' +import sys +from pathlib import Path +from Cython.Tempita import sub + +template = Path(sys.argv[1]).read_text("utf8") +output = sub(template) +Path(sys.argv[2]).write_text(output, "utf8") +''', '@INPUT@', '@OUTPUT@'] +) + # NumPy include directory np_dep = dependency('numpy') numpy_nodepr_api = ['-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION'] @@ -15,7 +31,6 @@ endif # MKL-specific options _threading_opt = get_option('mkl-threading') if _threading_opt == 'auto' - # openmp.pc not included with conda-forge distribution (yet) mkl_dep_name += '-seq' else mkl_dep_name += '-' + _threading_opt @@ -45,37 +60,13 @@ else endif -# Deal with M_PI & friends; add `use_math_defines` to c_args or cpp_args -# Cython doesn't always get this right itself (see, e.g., gh-16800), so -# explicitly add the define as a compiler flag for Cython-generated code. -is_windows = host_machine.system() == 'windows' -if is_windows - use_math_defines = ['-D_USE_MATH_DEFINES'] -else - use_math_defines = [] -endif - c_undefined_ok = ['-Wno-maybe-uninitialized'] -cython_c_args = [numpy_nodepr_api, use_math_defines, '-DCYTHON_CCOMPLEX=0'] - -cython_file = 'mkl_solver.pyx' - -if get_option('b_coverage') - # tell cython to enable linetracing - add_project_arguments(['--directive', 'linetrace=true'], language : 'cython') - # tell the c_compiler to definie the CYTHON_TRACE_NOGIL - add_project_arguments(['-DCYTHON_TRACE_NOGIL=1'], language : 'c') - - # compile the .c file from the .pyx file in it's directory. - # These should include the default options passed to the cython compiler - cython_file_full_path = meson.current_source_dir() / cython_file - run_command(cython, '-M', '--fast-fail', '-3', '--directive', 'linetrace=true', cython_file_full_path) -endif +cython_c_args = [numpy_nodepr_api, '-DCYTHON_CCOMPLEX=0'] module_path = 'pydiso' py.extension_module( - 'mkl_solver', + '_mkl_solver', cython_file, c_args: cython_c_args, install: true, @@ -84,7 +75,7 @@ py.extension_module( ) python_sources = [ - '__init__.py', + '__init__.py', 'mkl_solver.py' ] py.install_sources( diff --git a/pydiso/mkl_solver.py b/pydiso/mkl_solver.py new file mode 100644 index 0000000..a4cf6a7 --- /dev/null +++ b/pydiso/mkl_solver.py @@ -0,0 +1,352 @@ +import numpy as np +import scipy.sparse as sp +from ._mkl_solver import ( + _PardisoHandle_int_t, + _PardisoHandle_long_t, + get_mkl_int_size, + get_mkl_int64_size, + get_mkl_max_threads, + get_mkl_pardiso_max_threads, + set_mkl_threads, + set_mkl_pardiso_threads, + get_mkl_version, + _err_messages, + PardisoWarning, + PardisoError, +) +import warnings + + +MATRIX_TYPES ={ + 'real_structurally_symmetric': 1, + 'real_symmetric_positive_definite': 2, + 'real_symmetric_indefinite': -2, + 'complex_structurally_symmetric': 3, + 'complex_hermitian_positive_definite': 4, + 'complex_hermitian_indefinite': -4, + 'complex_symmetric': 6, + 'real_nonsymmetric': 11, + 'complex_nonsymmetric': 13} +"""dict : matrix type string keys and corresponding integer value to describe +the different supported matrix types. +""" + + +class PardisoTypeConversionWarning( + PardisoWarning, sp.SparseEfficiencyWarning): + pass + + +def _ensure_csr(A, sym=False): + if not (sp.isspmatrix_csr(A)): + if sym and sp.isspmatrix_csc(A): + A = A.T + else: + warnings.warn("Converting %s matrix to CSR format." + %A.__class__.__name__, PardisoTypeConversionWarning) + A = A.tocsr() + return A + + +class MKLPardisoSolver: + + def __init__(self, A, matrix_type=None, factor=True, verbose=False): + '''ParidsoSolver(A, matrix_type=None, factor=True, verbose=False) + An interface to the intel MKL pardiso sparse matrix solver. + + This is a solver class for a scipy sparse matrix using the Pardiso sparse + solver in the Intel Math Kernel Library. + + It will factorize the sparse matrix in three steps: a symbolic + factorization stage, a numerical factorization stage, and a solve stage. + + The purpose is to construct a sparse factorization that can be repeatedly + called to solve for multiple right-hand sides. + + Parameters + ---------- + A : scipy.sparse.spmatrix + A sparse matrix preferably in a CSR format. + matrix_type : str, int, or None, optional + A string describing the matrix type, or it's corresponding int code. + If None, then assumed to be nonsymmetric matrix. + factor : bool, optional + Whether to perform the factorization stage upon instantiation of the class. + verbose : bool, optional + Enable verbose output from the pardiso solver. + + Notes + ----- + + The supported matrix types are: real symmetric positive definite, real + symmetric indefinite, real structurally symmetric, real nonsymmetric, + complex hermitian positive definite, complex hermitian indefinite, complex + symmetric, complex structurally symmetric, and complex nonsymmetric. + The solver supports both single and double precision matrices. + + Examples + -------- + + Solve a symmetric positive definite system by first forming a simple 5 point + laplacian stencil with a zero boundary condition. Then we create a known + solution vector to compare to result with. + + >>> import scipy.sparse as sp + >>> from pydiso.mkl_solver import MKLPardisoSolver + >>> nx, ny = 5, 7 + >>> Dx = sp.diags((-1, 1), (-1, 0), (nx+1, nx)) + >>> Dy = sp.diags((-1, 1), (-1, 0), (ny+1, ny)) + >>> A = sp.kron(sp.eye(nx), Dy.T @ Dy) + sp.kron(Dx.T @ Dx, sp.eye(ny)) + >>> x = np.linspace(-10, 10, nx*ny) + >>> b = A @ x + + Next we create the solver object using pardiso + + >>> Ainv = MKLPardisoSolver(A, matrix_type='real_symmetric_positive_definite') + >>> x_solved = Ainv.solve(b) + >>> np.allclose(x, x_solved) + True + ''' + if not sp.issparse(A): + raise TypeError(f"type(A)={type(A).__name__} must be a sparse array or sparse matrix.") + + if A.ndim != 2: + raise ValueError(f"A.ndim={A.ndim} must be to 2.") + + n_row, n_col = A.shape + if n_row != n_col: + raise ValueError(f"A with shape {A.shape} is not a square matrix.") + self.shape = n_row, n_col + + data_dtype = A.dtype + if not( + np.issubdtype(data_dtype, np.single) or + np.issubdtype(data_dtype, np.double) or + np.issubdtype(data_dtype, np.csingle) or + np.issubdtype(data_dtype, np.cdouble) + ): + raise ValueError( + f"Unrecognized matrix data type, {data_dtype}. Must be single or double precision of real or complex values." + ) + self._data_dtype = data_dtype + + is_complex = np.issubdtype(data_dtype, np.complexfloating) + + if matrix_type is None: + if is_complex: + matrix_type = MATRIX_TYPES['complex_nonsymmetric'] + else: + matrix_type = MATRIX_TYPES['real_nonsymmetric'] + + if not(matrix_type in MATRIX_TYPES or matrix_type in MATRIX_TYPES.values()): + raise TypeError(f'Unrecognized matrix_type: {matrix_type}') + if matrix_type in MATRIX_TYPES: + matrix_type = MATRIX_TYPES[matrix_type] + + if matrix_type in [1, 2, -2, 11]: + if is_complex: + raise ValueError( + f"Complex matrix dtype and matrix_type={matrix_type} are inconsistent, expected a real matrix" + ) + else: + if not is_complex: + raise ValueError( + f"Real matrix dtype and matrix_type={matrix_type} are inconsistent, expected a complex matrix" + ) + + self.matrix_type = matrix_type + + indptr = np.asarray(A.indptr) # double check it's a numpy array + mkl_int_size= get_mkl_int_size() + mkl_int64_size = get_mkl_int64_size() + + target_int_size = mkl_int_size if indptr.itemsize <= mkl_int_size else mkl_int64_size + self._ind_dtype = np.dtype(f"i{target_int_size}") + + data, indptr, indices = self._validate_matrix(A) + self._data = data + self._indptr = indptr + self._indices = indices + + if target_int_size == mkl_int_size: + HandleClass = _PardisoHandle_int_t + else: + HandleClass = _PardisoHandle_long_t + self._handle = HandleClass(self._data_dtype, self.shape[0], matrix_type, maxfct=1, mnum=1, msglvl=verbose) + + self._analyze() + self._factored = False + if factor: + self._factor() + + def refactor(self, A): + """solver.refactor(A) + re-use a symbolic factorization with a new `A` matrix. + + Note + ---- + Must have the same non-zero pattern as the initial `A` matrix. + If `full_refactor=False`, the initial factorization is used as a + preconditioner to a Krylov subspace solver in the solve step. + + Parameters + ---------- + A : scipy.sparse.spmatrix + A sparse matrix preferably in a CSR format. + """ + #Assumes that the matrix A has the same non-zero pattern and ordering + #as the initial A matrix + + if not sp.issparse(A): + raise TypeError("A is not a sparse matrix.") + if A.shape != self.shape: + raise ValueError("A is not the same size as the previous matrix.") + data, indptr, indices = self._validate_matrix(A) + if len(data) != len(self._data): + raise ValueError("new A matrix does not have the same number of non zeros.") + + self._data = data + self._factor() + + def __call__(self, b): + return self.solve(b) + + def solve(self, b, x=None, transpose=False): + """solve(self, b, x=None, transpose=False) + Solves the equation AX=B using the factored A matrix + + Note + ---- + The data will be copied if not contiguous in all cases. If multiple rhs + are given, the input arrays will be copied if not in a contiguous + Fortran order. + + Parameters + ---------- + b : numpy.ndarray + array of shape 1D or 2D for the right hand side of the equation + (of the same data type as A). + x : numpy.ndarray, optional + A pre-allocated output array (of the same data type as A). + If None, a new array is constructed. + transpose : bool, optional + If True, it will solve A^TX=B using the factored A matrix. + + Returns + ------- + numpy array + array containing the solution (in Fortran ordering) + """ + if(not self._factored): + raise PardisoError("Cannot solve without a previous factorization.") + + if b.dtype != self._data_dtype: + warnings.warn("rhs does not have the same data type as A", + PardisoTypeConversionWarning) + b = b.astype(self._data_dtype) + b = np.atleast_1d(b) + b_was_1d = b.ndim == 1 + if b_was_1d: + b = b[:, None] + if b.ndim != 2: + raise ValueError(f"b.ndim={b.ndim} must be 1 or 2.") + if b.shape[0] != self.shape[0]: + raise ValueError(f"incorrect length of b, expected {self.shape[0]}, got {b.shape[0]}") + b = np.require(b, requirements='F') + + if x is None: + x = np.empty_like(b) + x_was_1d = b_was_1d + else: + if(x.dtype!=self._data_dtype): + warnings.warn("output does not have the same data type as A", + PardisoTypeConversionWarning) + x = x.astype(self._data_dtype) + x = np.atleast_1d(x) + x_was_1d = x.ndim == 1 + if x_was_1d: + x = x[:, None] + if x.ndim != 2: + raise ValueError(f"x.ndim={x.ndim} must be 1 or 2.") + if x.shape[0] != self.shape[0]: + raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}") + x = np.require(x, requirements='F') + + if b.shape[1] != x.shape[1]: + raise ValueError( + f"Inconsistent shapes of right hand side, {b.shape} and output vector, {x.shape}") + + if x is b or (x.base is not None and (x.base is b.base)): + raise ValueError("x and b cannot point to the same memory") + + self._handle.set_iparm(11, 2 if transpose else 0) + + phase = 33 + error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, b, x) + if error: + raise PardisoError("Solve step error, "+_err_messages[error]) + if x_was_1d: + x = x[:, 0] + return x + + @property + def perm(self): + """ Fill-reducing permutation vector used inside pardiso. + """ + return np.array(self._handle.perm) + + @property + def iparm(self): + """ Parameter options for the pardiso solver. + """ + return np.array(self._handle.iparm) + + def _validate_matrix(self, mat): + + if self.matrix_type in [-2, 2, -4, 4, 6]: + # Symmetric matrices must have only the upper triangle + if sp.isspmatrix_csc(mat): + mat = mat.T # Transpose to get a CSR matrix since it's symmetric + mat = sp.triu(mat, format='csr') + mat = _ensure_csr(mat) + mat.sort_indices() + mat.sum_duplicates() + + data = np.require(mat.data, self._data_dtype, requirements="C") + indptr = np.require(mat.indptr, self._ind_dtype, requirements="C") + indices = np.require(mat.indices, self._ind_dtype, requirements="C") + return data, indptr, indices + + + def set_iparm(self, i, val): + if i > 63 or i < 0: + raise IndexError(f"index {i} is out of bounds for size 64 array") + if i not in [ + 1, 3, 4, 5, 7, 9, 10, 11, 12, 17, 18, 20, 23, + 24, 26, 30, 33, 34, 35, 36, 38, 42, 55, 59 + ]: + raise ValueError(f"cannot set parameter {i} of the iparm array") + + self._handle.set_iparm(i, val) + + @property + def nnz(self): + return self._handle.iparm[17] + + def _analyze(self): + phase = 11 + xb_dummy = np.empty([1,1], dtype=self._data_dtype) + error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy) + if error: + raise PardisoError("Analysis step error, "+_err_messages[error]) + + def _factor(self): + phase = 22 + self._factored = False + xb_dummy = np.empty([1,1], dtype=self._data_dtype) + error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy) + + if error: + raise PardisoError("Factor step error, "+_err_messages[error]) + + self._factored = True \ No newline at end of file diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx deleted file mode 100644 index 971ab52..0000000 --- a/pydiso/mkl_solver.pyx +++ /dev/null @@ -1,676 +0,0 @@ -#cython: language_level=3 -cimport numpy as np -import cython -from cpython.ref cimport( - Py_INCREF, - Py_DECREF, -) -from cpython.pythread cimport ( - PyThread_type_lock, - PyThread_allocate_lock, - PyThread_acquire_lock, - PyThread_release_lock, - PyThread_free_lock -) - -import warnings -import numpy as np -import scipy.sparse as sp -import os -import sys - -# We use np.PyArray_DATA to grab the pointer -# to a numpy array. -np.import_array() - -cdef extern from 'mkl.h': - ctypedef long long MKL_INT64 - ctypedef int MKL_INT - -ctypedef MKL_INT int_t -ctypedef MKL_INT64 long_t - -cdef extern from 'mkl.h': - int MKL_DOMAIN_PARDISO - - ctypedef struct MKLVersion: - int MajorVersion - int MinorVersion - int UpdateVersion - char * ProductStatus - char * Build - char * Processor - char * Platform - - void mkl_get_version(MKLVersion* pv) - - void mkl_set_num_threads(int nth) - int mkl_domain_set_num_threads(int nt, int domain) - int mkl_get_max_threads() - int mkl_domain_get_max_threads(int domain) - - ctypedef int (*ProgressEntry)(int* thread, int* step, char* stage, int stage_len) except? -1; - ProgressEntry mkl_set_progress(ProgressEntry progress); - - ctypedef void * _MKL_DSS_HANDLE_t - - void pardiso(_MKL_DSS_HANDLE_t, const int_t*, const int_t*, const int_t*, - const int_t *, const int_t *, const void *, const int_t *, - const int_t *, int_t *, const int_t *, int_t *, - const int_t *, void *, void *, int_t *) nogil - - void pardiso_64(_MKL_DSS_HANDLE_t, const long_t *, const long_t *, const long_t *, - const long_t *, const long_t *, const void *, const long_t *, - const long_t *, long_t *, const long_t *, long_t *, - const long_t *, void *, void *, long_t *) nogil - - -#call pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, perm, nrhs, iparm, msglvl, b, x, error) -cdef int mkl_progress(int *thread, int* step, char* stage, int stage_len) nogil: - # must be a nogil process to pass to mkl pardiso progress reporting - with gil: - # must reacquire the gil to print out back to python. - print(thread[0], step[0], stage, stage_len) - return 0 - -cdef int mkl_no_progress(int *thread, int* step, char* stage, int stage_len) nogil: - return 0 - -MATRIX_TYPES ={ - 'real_structurally_symmetric': 1, - 'real_symmetric_positive_definite': 2, - 'real_symmetric_indefinite': -2, - 'complex_structurally_symmetric': 3, - 'complex_hermitian_positive_definite': 4, - 'complex_hermitian_indefinite': -4, - 'complex_symmetric': 6, - 'real_nonsymmetric': 11, - 'complex_nonsymmetric': 13} -"""dict : matrix type string keys and corresponding integer value to describe -the different supported matrix types. -""" - -_err_messages = {0:"no error", - -1:'input inconsistent', - -2:'not enough memory', - -3:'reordering problem', - -4:'zero pivot, numerical factorization or iterative refinement problem', - -5:'unclassified (internal) error', - -6:'reordering failed', - -7:'diagonal matrix is singular', - -8:'32-bit integer overflow problem', - -9:'not enough memory for OOC', - -10:'error opening OOC files', - -11:'read/write error with OOC files', - -12:'pardiso_64 called from 32-bit library', - } - -class PardisoError(Exception): - pass - -class PardisoWarning(UserWarning): - pass - -class PardisoTypeConversionWarning( - PardisoWarning, sp.SparseEfficiencyWarning): - pass - -def _ensure_csr(A, sym=False): - if not sp.issparse(A): - raise(PardisoError("Matrix is not sparse.")) - if not (sp.isspmatrix_csr(A)): - if sym and sp.isspmatrix_csc(A): - A = A.T - else: - warnings.warn("Converting %s matrix to CSR format." - %A.__class__.__name__, PardisoTypeConversionWarning) - A = A.tocsr() - return A - -def get_mkl_max_threads(): - """ - Returns the current number of openMP threads available to the MKL Library - """ - return mkl_get_max_threads() - -def get_mkl_pardiso_max_threads(): - """ - Returns the current number of openMP threads available to the Pardiso functions - """ - return mkl_domain_get_max_threads(MKL_DOMAIN_PARDISO) - -def set_mkl_threads(num_threads=None): - """ - Sets the number of openMP threads available to the MKL library. - - Parameters - ---------- - num_threads : None or int - number of threads to use for the MKL library. - None will set the number of threads to that returned by `os.cpu_count()`. - """ - if num_threads is None: - num_threads = os.cpu_count() - elif num_threads<=0: - raise PardisoError('Number of threads must be greater than 0') - mkl_set_num_threads(num_threads) - -def set_mkl_pardiso_threads(num_threads=None): - """ - Sets the number of openMP threads available to the Pardiso functions - - Parameters - ---------- - num_threads : None or int - Number of threads to use for the MKL Pardiso routines. - None (or 0) will set the number of threads to `get_mkl_max_threads` - """ - if num_threads is None: - num_threads = 0 - elif num_threads<0: - raise PardisoError('Number of threads must be greater than 0') - mkl_domain_set_num_threads(num_threads, MKL_DOMAIN_PARDISO) - -def get_mkl_version(): - """ - Returns a dictionary describing the version of Intel Math Kernel Library used - """ - cdef MKLVersion vers - mkl_get_version(&vers) - return vers - -cdef class _PardisoParams: - cdef int_t iparm[64] - cdef int_t n, mtype, maxfct, mnum, msglvl - cdef int_t[:] ia, ja, perm - -cdef class _PardisoParams64: - cdef long_t iparm[64] - cdef long_t n, mtype, maxfct, mnum, msglvl - cdef long_t[:] ia, ja, perm - -ctypedef fused _par_params: - _PardisoParams - _PardisoParams64 - -cdef class MKLPardisoSolver: - cdef _MKL_DSS_HANDLE_t handle[64] - cdef _PardisoParams _par - cdef _PardisoParams64 _par64 - cdef int_t _is_32 - cdef int_t mat_type - cdef int_t _factored - cdef size_t shape[2] - cdef PyThread_type_lock lock - cdef void * a - - cdef object _data_type - cdef object _Adata # a reference to make sure the pointer "a" doesn't get destroyed - - def __cinit__(self, *args, **kwargs): - self.lock = PyThread_allocate_lock() - - for i in range(64): - self.handle[i] = NULL - - def __init__(self, A, matrix_type=None, factor=True, verbose=False): - '''ParidsoSolver(A, matrix_type=None, factor=True, verbose=False) - An interface to the intel MKL pardiso sparse matrix solver. - - This is a solver class for a scipy sparse matrix using the Pardiso sparse - solver in the Intel Math Kernel Library. - - It will factorize the sparse matrix in three steps: a symbolic - factorization stage, a numerical factorization stage, and a solve stage. - - The purpose is to construct a sparse factorization that can be repeatedly - called to solve for multiple right-hand sides. - - Parameters - ---------- - A : scipy.sparse.spmatrix - A sparse matrix preferably in a CSR format. - matrix_type : str, int, or None, optional - A string describing the matrix type, or it's corresponding int code. - If None, then assumed to be nonsymmetric matrix. - factor : bool, optional - Whether to perform the factorization stage upon instantiation of the class. - verbose : bool, optional - Enable verbose output from the pardiso solver. - - Notes - ----- - - The supported matrix types are: real symmetric positive definite, real - symmetric indefinite, real structurally symmetric, real nonsymmetric, - complex hermitian positive definite, complex hermitian indefinite, complex - symmetric, complex structurally symmetric, and complex nonsymmetric. - The solver supports both single and double precision matrices. - - Examples - -------- - - Solve a symmetric positive definite system by first forming a simple 5 point - laplacian stencil with a zero boundary condition. Then we create a known - solution vector to compare to result with. - - >>> import scipy.sparse as sp - >>> from pydiso.mkl_solver import MKLPardisoSolver - >>> nx, ny = 5, 7 - >>> Dx = sp.diags((-1, 1), (-1, 0), (nx+1, nx)) - >>> Dy = sp.diags((-1, 1), (-1, 0), (ny+1, ny)) - >>> A = sp.kron(sp.eye(nx), Dy.T @ Dy) + sp.kron(Dx.T @ Dx, sp.eye(ny)) - >>> x = np.linspace(-10, 10, nx*ny) - >>> b = A @ x - - Next we create the solver object using pardiso - - >>> Ainv = MKLPardisoSolver(A, matrix_type='real_symmetric_positive_definite') - >>> x_solved = Ainv.solve(b) - >>> np.allclose(x, x_solved) - True - ''' - n_row, n_col = A.shape - if n_row != n_col: - raise ValueError("Matrix is not square") - self.shape = n_row, n_col - - self._data_type = A.dtype - if matrix_type is None: - if np.issubdtype(self._data_type, np.complexfloating): - matrix_type = MATRIX_TYPES['complex_nonsymmetric'] - elif np.issubdtype(self._data_type, np.floating): - matrix_type = MATRIX_TYPES['real_nonsymmetric'] - else: - raise(PardisoError('Unrecognized matrix data type')) - if not(matrix_type in MATRIX_TYPES or matrix_type in MATRIX_TYPES.values()): - raise PardisoError('Unrecognized matrix type') - if matrix_type in MATRIX_TYPES: - matrix_type = MATRIX_TYPES[matrix_type] - self.mat_type = matrix_type - - if self.mat_type in [1, 2, -2, 11]: - if not np.issubdtype(self._data_type, np.floating): - raise TypeError( - "matrix dtype and matrix_type not consistent, expected a real matrix" - ) - else: - if not np.issubdtype(self._data_type, np.complexfloating): - raise TypeError( - "matrix dtype and matrix_type not consistent, expected a complex matrix" - ) - - if self.mat_type in [-2, 2, -4, 4, 6]: - A = sp.triu(A, format='csr') - A = _ensure_csr(A) - A.sort_indices() - - #set integer length - integer_len = A.indices.itemsize - #self._is_32 = integer_len == sizeof(int_t) - self._is_32 = sizeof(int_t) == 8 or integer_len == sizeof(int_t) - - print("Am I calling _PardisoParams?") - print(self._is_32) - - print("What is the matrix's indices itemsize?") - print(integer_len) - - print("What is the matrix's indptr itemsize?") - print(A.indptr.itemsize) - - print("What is the size of MKL_INT") - print(sizeof(int_t)) - - if self._is_32: - self._par = _PardisoParams() - self._initialize(self._par, A, matrix_type, verbose) - else: - self._par64 = _PardisoParams64() - self._initialize(self._par64, A, matrix_type, verbose) - - if verbose: - #for reporting factorization progress via python's `print` - mkl_set_progress(mkl_progress) - else: - mkl_set_progress(mkl_no_progress) - - self._set_A(A.data) - self._analyze() - self._factored = False - if factor: - self._factor() - - def refactor(self, A): - """solver.refactor(A) - re-use a symbolic factorization with a new `A` matrix. - - Note - ---- - Must have the same non-zero pattern as the initial `A` matrix. - If `full_refactor=False`, the initial factorization is used as a - preconditioner to a Krylov subspace solver in the solve step. - - Parameters - ---------- - A : scipy.sparse.spmatrix - A sparse matrix preferably in a CSR format. - """ - #Assumes that the matrix A has the same non-zero pattern and ordering - #as the initial A matrix - if self.mat_type in [-2, 2, -4, 4, 6]: - A = sp.triu(A, format='csr') - A = _ensure_csr(A) - A.sort_indices() - - self._set_A(A.data) - self._factor() - - cdef _initialized(self): - # If any of the handle pointers are not null, return 1 - cdef int i - for i in range(64): - if self.handle[i]: - return 1 - return 0 - - def __call__(self, b): - return self.solve(b) - - def solve(self, b, x=None, transpose=False): - """solve(self, b, x=None, transpose=False) - Solves the equation AX=B using the factored A matrix - - Note - ---- - The data will be copied if not contiguous in all cases. If multiple rhs - are given, the input arrays will be copied if not in a contiguous - Fortran order. - - Parameters - ---------- - b : numpy.ndarray - array of shape 1D or 2D for the right hand side of the equation - (of the same data type as A). - x : numpy.ndarray, optional - A pre-allocated output array (of the same data type as A). - If None, a new array is constructed. - transpose : bool, optional - If True, it will solve A^TX=B using the factored A matrix. - - Returns - ------- - numpy array - array containing the solution (in Fortran ordering) - """ - if b.dtype != self._data_type: - warnings.warn("rhs does not have the same data type as A", - PardisoTypeConversionWarning) - b = b.astype(self._data_type) - b = np.atleast_1d(b) - if b.shape[0] != self.shape[0]: - raise ValueError(f"incorrect length of b, expected {self.shape[0]}, got {b.shape[0]}") - b = np.require(b, requirements='F') - - if x is None: - x = np.empty_like(b) - if(x.dtype!=self._data_type): - warnings.warn("output does not have the same data type as A", - PardisoTypeConversionWarning) - x = x.astype(self._data_type) - x = np.atleast_1d(x) - if x.shape[0] != self.shape[0]: - raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}") - x = np.require(x, requirements='F') - - print("Validated input arrays") - - Py_INCREF(b) - cdef void * bp = np.PyArray_DATA(b) - Py_INCREF(x) - cdef void * xp = np.PyArray_DATA(x) - - print("assigned x and b pointers") - - if bp == xp: - Py_DECREF(b) - Py_DECREF(x) - raise PardisoError("b and x must be different arrays") - - cdef int_t nrhs = b.shape[1] if b.ndim == 2 else 1 - - if transpose: - self.set_iparm(11, 2) - else: - self.set_iparm(11, 0) - - print("updated iparm 11") - try: - self._solve(bp, xp, nrhs) - except Exception as err: - raise err - finally: - Py_DECREF(b) - Py_DECREF(x) - return x - - @property - def perm(self): - """ Fill-reducing permutation vector used inside pardiso. - """ - if self._is_32: - return np.array(self._par.perm) - else: - return np.array(self._par64.perm) - - @property - def iparm(self): - """ Parameter options for the pardiso solver. - """ - if self._is_32: - return np.array(self._par.iparm) - else: - return np.array(self._par64.iparm) - - def set_iparm(self, int_t i, int_t val): - if i > 63 or i < 0: - raise IndexError(f"index {i} is out of bounds for size 64 array") - if i not in [ - 1, 3, 4, 5, 7, 9, 10, 11, 12, 17, 18, 20, 23, - 24, 26, 30, 33, 34, 35, 36, 38, 42, 55, 59 - ]: - raise PardisoError(f"cannot set parameter {i} of the iparm array") - if self._is_32: - self._par.iparm[i] = val - else: - self._par64.iparm[i] = val - - @property - def nnz(self): - return self.iparm[17] - - cdef _initialize(self, _par_params par, A, matrix_type, verbose): - - if _par_params is _PardisoParams: - int_dtype = f'i{sizeof(int_t)}' - else: - int_dtype = 'i8' - par.n = A.shape[0] - print("Initializing permutation array with ", int_dtype, "dtype") - par.perm = np.empty(par.n, dtype=int_dtype) - - par.maxfct = 1 - par.mnum = 1 - - par.mtype = matrix_type - par.msglvl = verbose - - for i in range(64): - par.iparm[i] = 0 # ensure these all start at 0 - - # set default parameters - par.iparm[0] = 1 # tell pardiso to not reset these values on the first call - par.iparm[1] = 2 # The nested dissection algorithm from the METIS - par.iparm[3] = 0 # The factorization is always computed as required by phase. - par.iparm[4] = 2 # fill perm with computed permutation vector - par.iparm[5] = 0 # The array x contains the solution; right-hand side vector b is kept unchanged. - par.iparm[7] = 0 # The solver automatically performs two steps of iterative refinement when perterbed pivots are obtained - par.iparm[9] = 13 if matrix_type in [11, 13] else 8 - par.iparm[10] = 1 if matrix_type in [11, 13] else 0 - par.iparm[11] = 0 # Solve a linear system AX = B (as opposed to A.T or A.H) - par.iparm[12] = 1 if matrix_type in [11, 13] else 0 - par.iparm[17] = -1 # Return the number of non-zeros in this value after first call - par.iparm[18] = 0 # do not report flop count - par.iparm[20] = 1 if matrix_type in [-2, -4, 6] else 0 - par.iparm[23] = 0 # classic (not parallel) factorization - par.iparm[24] = 0 # default behavoir of parallel solving - par.iparm[26] = 0 # Do not check the input matrix - #set precision - if self._data_type==np.float64 or self._data_type==np.complex128: - par.iparm[27] = 0 - elif self._data_type==np.float32 or self._data_type==np.complex64: - par.iparm[27] = 1 - else: - raise TypeError("Unsupported data type") - par.iparm[30] = 0 # this would be used to enable sparse input/output for solves - par.iparm[33] = 0 # optimal number of thread for CNR mode - par.iparm[34] = 1 # zero based indexing - par.iparm[35] = 0 # Do not compute schur complement - par.iparm[36] = 0 # use CSR storage format - par.iparm[38] = 0 # Do not use low rank update - par.iparm[42] = 0 # Do not compute the diagonal of the inverse - par.iparm[55] = 0 # Internal function used to work with pivot and calculation of diagonal arrays turned off. - par.iparm[59] = 0 # operate in-core mode - - par.ia = np.require(A.indptr, dtype=int_dtype, requirements='C') - par.ja = np.require(A.indices, dtype=int_dtype, requirements='C') - - cdef _set_A(self, data): - self._Adata = np.require(data, requirements='C') - self.a = np.PyArray_DATA(data) - - def __dealloc__(self): - # Need to call pardiso with phase=-1 to release memory - cdef int_t phase=-1, nrhs=0, error=0 - cdef long_t phase64=-1, nrhs64=0, error64=0 - - print("Deallocating myself") - - if self._initialized(): - print("I was initialized, so call pardiso to release the memory.") - with nogil: - PyThread_acquire_lock(self.lock, 1) - if self._is_32: - pardiso( - self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype, - &phase, &self._par.n, NULL, NULL, NULL, NULL, &nrhs, self._par.iparm, - &self._par.msglvl, NULL, NULL, &error - ) - else: - pardiso_64( - self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype, - &phase64, &self._par64.n, NULL, NULL, NULL, NULL, &nrhs64, - self._par64.iparm, &self._par64.msglvl, NULL, NULL, &error64 - ) - PyThread_release_lock(self.lock) - err = error or error64 - if err!=0: - raise PardisoError("Memory release error "+_err_messages[err]) - for i in range(64): - self.handle[i] = NULL - - if self.lock: - print("I had a lock") - #dealloc lock - PyThread_free_lock(self.lock) - self.lock = NULL - print("lock is no more lock") - - print("Deallocated myself") - - cdef _analyze(self): - #phase = 11 - with nogil: - err = self._run_pardiso(11) - if err!=0: - raise PardisoError("Analysis step error, "+_err_messages[err]) - - cdef _factor(self): - #phase = 22 - self._factored = False - - with nogil: - err = self._run_pardiso(22) - - if err!=0: - raise PardisoError("Factor step error, "+_err_messages[err]) - - self._factored = True - - cdef _solve(self, void* b, void* x, int_t nrhs_in): - #phase = 33 - if(not self._factored): - raise PardisoError("Cannot solve without a previous factorization.") - - print("trying to solve") - - with nogil: - err = self._run_pardiso(33, b, x, nrhs_in) - - if err!=0: - raise PardisoError("Solve step error, "+_err_messages[err]) - - @cython.boundscheck(False) - cdef int _run_pardiso(self, int_t phase, void* b=NULL, void* x=NULL, int_t nrhs=0) noexcept nogil: - cdef int_t error=0 - cdef long_t error64=0, phase64=phase, nrhs64=nrhs - - PyThread_acquire_lock(self.lock, 1) - if self._is_32: - with gil: - print("Calling pardiso") - - print("maxfct", self._par.maxfct) - print("mnum", self._par.mnum) - print("mtype", self._par.mtype) - print("phase", phase) - print("n", self._par.n) - print("nrhs", nrhs) - print("msglvl", self._par.msglvl) - print("ia") - n = self._par.n - for i in range(n+1): - print(self._par.ia[i]) - nnz = self._par.ia[n] - print("ja") - for i in range(nnz): - print(self._par.ja[i]) - - print("a") - for i in range(nnz): - print(self._Adata[i]) - if x: - print("x") - for i in range(n): - x[i] - if b: - print("b") - for i in range(n): - b[i] - - sys.stdout.flush() - - pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype, - &phase, &self._par.n, self.a, &self._par.ia[0], &self._par.ja[0], - &self._par.perm[0], &nrhs, self._par.iparm, &self._par.msglvl, b, x, &error) - - with gil: - print("Called pardiso, error was ", error) - sys.stdout.flush() - else: - pardiso_64(self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype, - &phase64, &self._par64.n, self.a, &self._par64.ia[0], &self._par64.ja[0], - &self._par64.perm[0], &nrhs64, self._par64.iparm, &self._par64.msglvl, b, x, &error64) - PyThread_release_lock(self.lock) - error = error or error64 - return error \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e0d3b8b..f06abe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ name = 'pydiso' dynamic = ["version"] description = "Wrapper for intel's pardiso implementation in the MKL" readme = 'README.md' -requires-python = '>=3.8' +requires-python = '>=3.10' authors = [ {name = 'SimPEG developers', email = 'josephrcapriotti@gmail.com'}, ] diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py index 5f65254..ebf3961 100644 --- a/tests/test_pydiso.py +++ b/tests/test_pydiso.py @@ -25,8 +25,8 @@ Uc = sp.diags([(2+2j), -(1+1j)], [0, 1], (n, n)) U2c = sp.diags([(2+2j), -(1+1j)], [0, 2], (n, n)) -xr = np.random.rand(n) -xc = np.random.rand(n) + np.random.rand(n)*1j +xr = np.linspace(-10, 10, n) +xc = np.linspace(-20, 20, n) + np.linspace(20, -20, n)*1j A_real_dict = {'real_structurally_symmetric': L@U, 'real_symmetric_positive_definite': L@L.T, @@ -35,8 +35,8 @@ } A_complex_dict = {'complex_structurally_symmetric': Lc@Uc, 'complex_hermitian_positive_definite': Lc@Lc.T.conjugate(), - 'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(), - 'complex_symmetric': Lc@Lc.T, + #'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(), + #'complex_symmetric': Lc@Lc.T, 'complex_nonsymmetric': Lc@U2c } @@ -86,7 +86,6 @@ def test_version(): @pytest.mark.parametrize("A, matrix_type", inputs) def test_solver(A, matrix_type): dtype = A.dtype - print(dtype) if np.issubdtype(dtype, np.complexfloating): x = xc.astype(dtype) else: @@ -94,11 +93,11 @@ def test_solver(A, matrix_type): b = A@x solver = Solver(A, matrix_type=matrix_type) - sys.stdout.flush() + A_valid = sp.csr_matrix((solver._data, solver._indices, solver._indptr)) x2 = solver.solve(b) eps = np.finfo(dtype).eps - np.testing.assert_allclose(x, x2, atol=1E3*eps) + np.testing.assert_allclose(x, x2, atol=2E3*eps) @pytest.mark.parametrize("A, matrix_type", inputs) def test_transpose_solver(A, matrix_type): @@ -113,7 +112,7 @@ def test_transpose_solver(A, matrix_type): x2 = solver.solve(b, transpose=True) eps = np.finfo(dtype).eps - np.testing.assert_allclose(x, x2, atol=1E3*eps) + np.testing.assert_allclose(x, x2, atol=2E3*eps) def test_multiple_RHS(): A = A_real_dict["real_symmetric_positive_definite"] @@ -124,16 +123,16 @@ def test_multiple_RHS(): x2 = solver.solve(b) eps = np.finfo(np.float64).eps - np.testing.assert_allclose(x, x2, atol=1E3*eps) + np.testing.assert_allclose(x, x2, atol=2E3*eps) def test_matrix_type_errors(): A = A_real_dict["real_symmetric_positive_definite"] - with pytest.raises(TypeError): + with pytest.raises(ValueError): solver = Solver(A, matrix_type="complex_hermitian_positive_definite") A = A_complex_dict["complex_structurally_symmetric"] - with pytest.raises(TypeError): + with pytest.raises(ValueError): solver = Solver(A, matrix_type="real_symmetric_positive_definite") From 8dc1d4eca4e0c263c3f1d96ed147a4f32e986c8e Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Tue, 24 Sep 2024 09:30:47 -0600 Subject: [PATCH 23/25] remove a few call signatures from doc strings --- pydiso/mkl_solver.py | 57 +++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/pydiso/mkl_solver.py b/pydiso/mkl_solver.py index a4cf6a7..38113be 100644 --- a/pydiso/mkl_solver.py +++ b/pydiso/mkl_solver.py @@ -36,23 +36,10 @@ class PardisoTypeConversionWarning( PardisoWarning, sp.SparseEfficiencyWarning): pass - -def _ensure_csr(A, sym=False): - if not (sp.isspmatrix_csr(A)): - if sym and sp.isspmatrix_csc(A): - A = A.T - else: - warnings.warn("Converting %s matrix to CSR format." - %A.__class__.__name__, PardisoTypeConversionWarning) - A = A.tocsr() - return A - - class MKLPardisoSolver: def __init__(self, A, matrix_type=None, factor=True, verbose=False): - '''ParidsoSolver(A, matrix_type=None, factor=True, verbose=False) - An interface to the intel MKL pardiso sparse matrix solver. + '''An interface to the Intel MKL pardiso sparse matrix solver. This is a solver class for a scipy sparse matrix using the Pardiso sparse solver in the Intel Math Kernel Library. @@ -68,7 +55,7 @@ def __init__(self, A, matrix_type=None, factor=True, verbose=False): A : scipy.sparse.spmatrix A sparse matrix preferably in a CSR format. matrix_type : str, int, or None, optional - A string describing the matrix type, or it's corresponding int code. + A string describing the matrix type, or its corresponding integer code. If None, then assumed to be nonsymmetric matrix. factor : bool, optional Whether to perform the factorization stage upon instantiation of the class. @@ -157,7 +144,7 @@ def __init__(self, A, matrix_type=None, factor=True, verbose=False): self.matrix_type = matrix_type indptr = np.asarray(A.indptr) # double check it's a numpy array - mkl_int_size= get_mkl_int_size() + mkl_int_size = get_mkl_int_size() mkl_int64_size = get_mkl_int64_size() target_int_size = mkl_int_size if indptr.itemsize <= mkl_int_size else mkl_int64_size @@ -180,14 +167,11 @@ def __init__(self, A, matrix_type=None, factor=True, verbose=False): self._factor() def refactor(self, A): - """solver.refactor(A) - re-use a symbolic factorization with a new `A` matrix. + """Reuse a symbolic factorization with a new matrix. Note ---- Must have the same non-zero pattern as the initial `A` matrix. - If `full_refactor=False`, the initial factorization is used as a - preconditioner to a Krylov subspace solver in the solve step. Parameters ---------- @@ -212,14 +196,7 @@ def __call__(self, b): return self.solve(b) def solve(self, b, x=None, transpose=False): - """solve(self, b, x=None, transpose=False) - Solves the equation AX=B using the factored A matrix - - Note - ---- - The data will be copied if not contiguous in all cases. If multiple rhs - are given, the input arrays will be copied if not in a contiguous - Fortran order. + """Solves the equation AX=B using the factored A matrix Parameters ---------- @@ -234,12 +211,15 @@ def solve(self, b, x=None, transpose=False): Returns ------- - numpy array + numpy.ndarray array containing the solution (in Fortran ordering) - """ - if(not self._factored): - raise PardisoError("Cannot solve without a previous factorization.") + Notes + ----- + The data will be copied if not contiguous in all cases. If multiple rhs + are given, the input arrays will be copied if not in a contiguous + Fortran order. + """ if b.dtype != self._data_dtype: warnings.warn("rhs does not have the same data type as A", PardisoTypeConversionWarning) @@ -279,6 +259,9 @@ def solve(self, b, x=None, transpose=False): if x is b or (x.base is not None and (x.base is b.base)): raise ValueError("x and b cannot point to the same memory") + if not self._factored: + self._factor() + self._handle.set_iparm(11, 2 if transpose else 0) phase = 33 @@ -308,7 +291,11 @@ def _validate_matrix(self, mat): if sp.isspmatrix_csc(mat): mat = mat.T # Transpose to get a CSR matrix since it's symmetric mat = sp.triu(mat, format='csr') - mat = _ensure_csr(mat) + + if not (sp.isspmatrix_csr(mat)): + warnings.warn("Converting %s matrix to CSR format." + % mat.__class__.__name__, PardisoTypeConversionWarning) + mat = mat.tocsr() mat.sort_indices() mat.sum_duplicates() @@ -335,7 +322,7 @@ def nnz(self): def _analyze(self): phase = 11 - xb_dummy = np.empty([1,1], dtype=self._data_dtype) + xb_dummy = np.empty([1, 1], dtype=self._data_dtype) error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy) if error: raise PardisoError("Analysis step error, "+_err_messages[error]) @@ -343,7 +330,7 @@ def _analyze(self): def _factor(self): phase = 22 self._factored = False - xb_dummy = np.empty([1,1], dtype=self._data_dtype) + xb_dummy = np.empty([1, 1], dtype=self._data_dtype) error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy) if error: From a4eb41a483d248e078c8a13a7470e68245a9fc30 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Tue, 24 Sep 2024 09:36:46 -0600 Subject: [PATCH 24/25] add complex tests back in --- tests/test_pydiso.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py index ebf3961..624d50f 100644 --- a/tests/test_pydiso.py +++ b/tests/test_pydiso.py @@ -35,8 +35,8 @@ } A_complex_dict = {'complex_structurally_symmetric': Lc@Uc, 'complex_hermitian_positive_definite': Lc@Lc.T.conjugate(), - #'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(), - #'complex_symmetric': Lc@Lc.T, + 'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(), + 'complex_symmetric': Lc@Lc.T, 'complex_nonsymmetric': Lc@U2c } From e017d0e504a3bc46dbb32491b1025afcfb215395 Mon Sep 17 00:00:00 2001 From: Joseph Capriotti Date: Tue, 24 Sep 2024 09:44:13 -0600 Subject: [PATCH 25/25] update distribute workflow --- .github/workflows/python-package-conda.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index bfe46df..406d643 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -70,7 +70,6 @@ jobs: run: | ${{ matrix.coverage && 'coverage run -m' || '' }} pytest -s -v ${{ matrix.coverage && 'coverage xml' || '' }} -# continue-on-error: ${{ matrix.os == 'windows-latest' && matrix.mkl-version == '2024'}} - name: Upload coverage if: ${{ matrix.coverage }} @@ -93,9 +92,8 @@ jobs: - name: Setup Conda uses: conda-incubator/setup-miniconda@v3 with: - python-version: 3.11 - mamba-version: '*' - channels: conda-forge, defaults + python-version: "3.11" + channels: defaults channel-priority: true activate-environment: dev