From dcfe5b25799289f16aa438fc8d6d96510d5e10c5 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 16:11:24 -0600
Subject: [PATCH 01/25] only test on 3.10+

---
 .github/workflows/python-package-conda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index e276185..aaa5e0a 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -23,7 +23,7 @@ jobs:
       fail-fast: False
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
         mkl-version: ['2023']  # currently 2024 fails building for some reason...
         include:
           - os: ubuntu-latest

From 3224d4e0178a0303604ef43f6adec36ee9ab277b Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 16:11:39 -0600
Subject: [PATCH 02/25] rely on b_coverage flag

---
 meson.options      | 2 --
 pydiso/meson.build | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/meson.options b/meson.options
index f3968b5..6440a29 100644
--- a/meson.options
+++ b/meson.options
@@ -1,5 +1,3 @@
-option('cy_coverage', type : 'boolean', value : false)
-
 option('use-sdl', type: 'boolean', value: true,
        description: 'Use the single dynamic library.')
 
diff --git a/pydiso/meson.build b/pydiso/meson.build
index fd7f01c..4371952 100644
--- a/pydiso/meson.build
+++ b/pydiso/meson.build
@@ -95,7 +95,7 @@ cython_c_args = [numpy_nodepr_api, use_math_defines]\
 cython_file = 'mkl_solver.pyx'
 cython_file_full_path = meson.current_source_dir() / cython_file
 
-if get_option('cy_coverage')
+if get_option('b_coverage')
   # tell cython to enable linetracing
   add_project_arguments(['--directive', 'linetrace=true'], language : 'cython')
   # tell the c_compiler to definie the CYTHON_TRACE_NOGIL

From 6073d53cb48b1e0208eac31bf797772f4cfebe9f Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 16:43:41 -0600
Subject: [PATCH 03/25] updates for numpy 2.0.0

---
 .github/workflows/python-package-conda.yml |  6 ++--
 meson.build                                |  2 +-
 pydiso/meson.build                         | 42 ++--------------------
 pyproject.toml                             | 27 ++------------
 4 files changed, 10 insertions(+), 67 deletions(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index aaa5e0a..e3b8438 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -55,7 +55,7 @@ jobs:
 
     - name: Create environment
       run: |
-        mamba install --quiet --yes pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \
+        conda install --quiet --yes pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \
           mkl-devel pkg-config meson-python meson ninja setuptools_scm \
           ${{ matrix.coverage && 'coverage' || ''}}
 
@@ -63,7 +63,7 @@ jobs:
       run: |
         python -m pip install --no-build-isolation --verbose --editable . \
           --config-setting=compile-args=-v \
-          ${{ matrix.coverage && '--config-settings=setup-args="-Dcy_coverage=true"' || ''}}
+          ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}}
         conda list
 
     - name: Run Tests
@@ -107,7 +107,7 @@ jobs:
 
     - name: Create environment
       run: |
-        mamba install --quiet --yes pip numpy scipy cython mkl=2023 \
+        conda install --quiet --yes pip numpy scipy cython mkl=2023 \
           mkl-devel pkg-config meson-python meson ninja setuptools_scm \
           python-build
 
diff --git a/meson.build b/meson.build
index 3303d66..29b269a 100644
--- a/meson.build
+++ b/meson.build
@@ -14,7 +14,7 @@ print(get_version())'''
 ).stdout().strip(),
 
   license: 'MIT',
-  meson_version: '>= 1.1.0',
+  meson_version: '>= 1.4.0',
   default_options: [
     'buildtype=debugoptimized',
     'b_ndebug=if-release',
diff --git a/pydiso/meson.build b/pydiso/meson.build
index 4371952..67c9464 100644
--- a/pydiso/meson.build
+++ b/pydiso/meson.build
@@ -1,38 +1,5 @@
 # NumPy include directory
-# The try-except is needed because when things are
-# split across drives on Windows, there is no relative path and an exception
-# gets raised. There may be other such cases, so add a catch-all and switch to
-# an absolute path. Relative paths are needed when for example a virtualenv is
-# placed inside the source tree; Meson rejects absolute paths to places inside
-# the source tree.
-# For cross-compilation it is often not possible to run the Python interpreter
-# in order to retrieve numpy's include directory. It can be specified in the
-# cross file instead:
-#   [properties]
-#   numpy-include-dir = /abspath/to/host-pythons/site-packages/numpy/core/include
-#
-# This uses the path as is, and avoids running the interpreter.
-incdir_numpy = meson.get_external_property('numpy-include-dir', 'not-given')
-if incdir_numpy == 'not-given'
-  incdir_numpy = run_command(py,
-    [
-      '-c',
-      '''import os
-import numpy as np
-try:
-  incdir = os.path.relpath(np.get_include())
-except Exception:
-  incdir = np.get_include()
-print(incdir)
-  '''
-    ],
-    check: true
-  ).stdout().strip()
-else
-  _incdir_numpy_abs = incdir_numpy
-endif
-inc_np = include_directories(incdir_numpy)
-np_dep = declare_dependency(include_directories: inc_np)
+np_dep = dependency('numpy')
 
 # MKL-specific options
 mkl_dep_name = 'mkl-dynamic'
@@ -87,13 +54,10 @@ else
   use_math_defines = []
 endif
 
-numpy_nodepr_api = '-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION'
 c_undefined_ok = ['-Wno-maybe-uninitialized']
-
-cython_c_args = [numpy_nodepr_api, use_math_defines]\
+cython_c_args = [numpy_nodepr_api, use_math_defines, '-DCYTHON_CCOMPLEX=0']
 
 cython_file = 'mkl_solver.pyx'
-cython_file_full_path = meson.current_source_dir() / cython_file
 
 if get_option('b_coverage')
   # tell cython to enable linetracing
@@ -103,6 +67,7 @@ if get_option('b_coverage')
 
   # compile the .c file from the .pyx file in it's directory.
   # These should include the default options passed to the cython compiler
+  cython_file_full_path = meson.current_source_dir() / cython_file
   run_command(cython, '-M', '--fast-fail', '-3', '--directive', 'linetrace=true', cython_file_full_path)
 endif
 
@@ -111,7 +76,6 @@ module_path = 'pydiso'
 py.extension_module(
     'mkl_solver',
     cython_file,
-    include_directories: incdir_numpy,
     c_args: cython_c_args,
     install: true,
     subdir: module_path,
diff --git a/pyproject.toml b/pyproject.toml
index 0178128..e0d3b8b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,31 +2,10 @@
 [build-system]
 build-backend = 'mesonpy'
 requires = [
-    "meson-python>=0.14.0",
-    "Cython>=0.29.35",  # when updating version, also update check in meson.build
+    "meson-python>=0.15.0",
+    "Cython>=3.0.8",
     "setuptools_scm[toml]>=6.2",
-
-    # This package automatically provides all of the numpy pinning for different python
-    # versions and runtime requirements.
-    "oldest-supported-numpy",
-
-    # The following is taken from scipy's pyproject.toml file to handle
-    # building against the proper numpy API
-
-    # When numpy 2.0.0rc1 comes out, we should update this to build against 2.0,
-    # and then runtime depend on the range 1.22.X to <2.3. No need to switch to
-    # 1.25.2 in the meantime (1.25.x is the first version which exports older C
-    # API versions by default).
-
-    # default numpy requirements
-    # "numpy==1.22.4; python_version<='3.10' and platform_python_implementation != 'PyPy'",
-    # "numpy==1.23.2; python_version=='3.11' and platform_python_implementation != 'PyPy'",
-
-    # For Python versions which aren't yet officially supported, we specify an
-    # unpinned NumPy which allows source distributions to be used and allows
-    # wheels to be used as soon as they become available.
-    # "numpy>=1.26.0b1; python_version>='3.12'",
-    # "numpy; python_version>='3.8' and platform_python_implementation=='PyPy'",
+    "numpy>=2.0.0rc1",
 ]
 
 [project]

From 117177bfe1361c47a9b76bd4503f9afcc1f4fe93 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 16:48:13 -0600
Subject: [PATCH 04/25] test on latest-large

---
 .github/workflows/python-package-conda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index e3b8438..2cc1305 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       fail-fast: False
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest-large, windows-latest]
         python-version: ["3.10", "3.11", "3.12"]
         mkl-version: ['2023']  # currently 2024 fails building for some reason...
         include:

From 4d7f74f8bfe0304dba1e15e8f360f02f2a855f0d Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 16:51:26 -0600
Subject: [PATCH 05/25] add numpy dep warning back

---
 .github/workflows/python-package-conda.yml | 2 +-
 pydiso/meson.build                         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 2cc1305..8e1e6d8 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       fail-fast: False
       matrix:
-        os: [ubuntu-latest, macos-latest-large, windows-latest]
+        os: [ubuntu-latest, macos-12, windows-latest]
         python-version: ["3.10", "3.11", "3.12"]
         mkl-version: ['2023']  # currently 2024 fails building for some reason...
         include:
diff --git a/pydiso/meson.build b/pydiso/meson.build
index 67c9464..619a5dc 100644
--- a/pydiso/meson.build
+++ b/pydiso/meson.build
@@ -1,5 +1,6 @@
 # NumPy include directory
 np_dep = dependency('numpy')
+numpy_nodepr_api = ['-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION']
 
 # MKL-specific options
 mkl_dep_name = 'mkl-dynamic'

From b655371ef8ca15e9e63042e5b5b84543d038a19a Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 17:02:04 -0600
Subject: [PATCH 06/25] update test

---
 tests/test_pydiso.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py
index d5c715a..d4caf63 100644
--- a/tests/test_pydiso.py
+++ b/tests/test_pydiso.py
@@ -34,8 +34,8 @@
                'real_nonsymmetric': L@U2
                }
 A_complex_dict = {'complex_structurally_symmetric': Lc@Uc,
-                  'complex_hermitian_positive_definite': Lc@Lc.H,
-                  'complex_hermitian_indefinite': Lc@D@Lc.H,
+                  'complex_hermitian_positive_definite': Lc@Lc.T.conjugate(),
+                  'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(),
                   'complex_symmetric': Lc@Lc.T,
                   'complex_nonsymmetric': Lc@U2c
                   }

From c8a681e2dd08f69fa5e3dbc201d2ee11298d2841 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Mon, 9 Sep 2024 23:44:29 -0600
Subject: [PATCH 07/25] test mkl 2024 (exclude on high macos versions

---
 .github/workflows/python-package-conda.yml | 11 ++++-------
 pydiso/mkl_solver.pyx                      | 10 +++++-----
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 8e1e6d8..5913a53 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -24,17 +24,14 @@ jobs:
       matrix:
         os: [ubuntu-latest, macos-12, windows-latest]
         python-version: ["3.10", "3.11", "3.12"]
-        mkl-version: ['2023']  # currently 2024 fails building for some reason...
+        mkl-version: ['2023', '2024']
         include:
           - os: ubuntu-latest
             python-version: "3.12"
             coverage: ${{ true }}
-          - os: ubuntu-latest
-            python-version: "3.12"
-            mkl-version: '2024'
-          - os: windows-latest
-            python-version: "3.12"
-            mkl-version: '2024'
+        exclude:
+          - os: macos-12
+            mkl-version: "2024"
 
     steps:
     - uses: actions/checkout@v4
diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index bc82dd3..dc34bf4 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -1,7 +1,6 @@
 #cython: language_level=3
-cimport numpy as np
+cimport numpy as cnp
 import cython
-from cython cimport numeric
 from cpython.pythread cimport (
     PyThread_type_lock,
     PyThread_allocate_lock,
@@ -403,8 +402,8 @@ cdef class MKLPardisoSolver:
             raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}")
         x = np.require(x, requirements='F')
 
-        cdef void * bp = np.PyArray_DATA(b)
-        cdef void * xp = np.PyArray_DATA(x)
+        cdef void * bp = cnp.PyArray_DATA(b)
+        cdef void * xp = cnp.PyArray_DATA(x)
 
         if bp == xp:
             raise PardisoError("b and x must be different arrays")
@@ -510,7 +509,7 @@ cdef class MKLPardisoSolver:
 
     cdef _set_A(self, data):
         self._Adata = data
-        self.a = np.PyArray_DATA(data)
+        self.a = cnp.PyArray_DATA(data)
 
     def __dealloc__(self):
         # Need to call pardiso with phase=-1 to release memory
@@ -542,6 +541,7 @@ cdef class MKLPardisoSolver:
         if self.lock:
             #dealloc lock
             PyThread_free_lock(self.lock)
+            self.lock = NULL
 
     cdef _analyze(self):
         #phase = 11

From 9914874a316492f1b70187df9862209f19c8ef14 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Thu, 12 Sep 2024 08:49:31 -0600
Subject: [PATCH 08/25] write out the dtype, temporarily remove a test

---
 tests/test_pydiso.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py
index d4caf63..e319272 100644
--- a/tests/test_pydiso.py
+++ b/tests/test_pydiso.py
@@ -30,7 +30,7 @@
 
 A_real_dict = {'real_structurally_symmetric': L@U,
                'real_symmetric_positive_definite': L@L.T,
-               'real_symmetric_indefinite': L@D@L.T,
+               #'real_symmetric_indefinite': L@D@L.T,
                'real_nonsymmetric': L@U2
                }
 A_complex_dict = {'complex_structurally_symmetric': Lc@Uc,
@@ -86,6 +86,7 @@ def test_version():
 @pytest.mark.parametrize("A, matrix_type", inputs)
 def test_solver(A, matrix_type):
     dtype = A.dtype
+    print(dtype)
     if np.issubdtype(dtype, np.complexfloating):
         x = xc.astype(dtype)
     else:

From 537c209648362924fc21f2a12f499cf4f83d06d2 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Thu, 12 Sep 2024 13:13:12 -0600
Subject: [PATCH 09/25] use vsenv to build on windows

---
 .github/workflows/python-package-conda.yml | 3 ++-
 pydiso/mkl_solver.pyx                      | 1 -
 tests/test_pydiso.py                       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index 5913a53..ebac542 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -60,7 +60,8 @@ jobs:
       run: |
         python -m pip install --no-build-isolation --verbose --editable . \
           --config-setting=compile-args=-v \
-          ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}}
+          ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} \
+          ${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="--vsenv"' || ''}}
         conda list
 
     - name: Run Tests
diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index dc34bf4..802e4fc 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -16,7 +16,6 @@ import os
 
 cdef extern from 'mkl.h':
     ctypedef long long MKL_INT64
-    ctypedef unsigned long long MKL_UINT64
     ctypedef int MKL_INT
 
 ctypedef MKL_INT int_t
diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py
index e319272..addd733 100644
--- a/tests/test_pydiso.py
+++ b/tests/test_pydiso.py
@@ -30,7 +30,7 @@
 
 A_real_dict = {'real_structurally_symmetric': L@U,
                'real_symmetric_positive_definite': L@L.T,
-               #'real_symmetric_indefinite': L@D@L.T,
+               'real_symmetric_indefinite': L@D@L.T,
                'real_nonsymmetric': L@U2
                }
 A_complex_dict = {'complex_structurally_symmetric': Lc@Uc,

From 3a5e30af1f10f9157d25d9b21d58788cb88fad3d Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 14:15:18 -0600
Subject: [PATCH 10/25] add some debug statements

---
 pydiso/mkl_solver.pyx | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index 802e4fc..f4e5021 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -301,6 +301,18 @@ cdef class MKLPardisoSolver:
         #self._is_32 = integer_len == sizeof(int_t)
         self._is_32 = sizeof(int_t) == 8 or integer_len == sizeof(int_t)
 
+        print("Am I calling _PardisoParams?")
+        print(self._is_32)
+
+        print("What is the matrix's indices itemsize?")
+        print(integer_len)
+
+        print("What is the matrix's indptr itemsize?")
+        print(A.indptr.itemsize)
+
+        print("What is the size of MKL_INT")
+        print(sizeof(int_t))
+
         if self._is_32:
             self._par = _PardisoParams()
             self._initialize(self._par, A, matrix_type, verbose)
@@ -458,6 +470,7 @@ cdef class MKLPardisoSolver:
         else:
             int_dtype = 'i8'
         par.n = A.shape[0]
+        print("Initializing permutation array with ", int_dtype, "dtype")
         par.perm = np.empty(par.n, dtype=int_dtype)
 
         par.maxfct = 1

From 85a914b99a98d254130013f9981258b5902be768 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 14:44:58 -0600
Subject: [PATCH 11/25] some more debug

---
 pydiso/mkl_solver.pyx | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index f4e5021..ddacbdb 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -358,6 +358,7 @@ cdef class MKLPardisoSolver:
         self._factor()
 
     cdef _initialized(self):
+        # If any of the handle pointers are not null, return 1
         cdef int i
         for i in range(64):
             if self.handle[i]:
@@ -528,7 +529,10 @@ cdef class MKLPardisoSolver:
         cdef int_t phase=-1, nrhs=0, error=0
         cdef long_t phase64=-1, nrhs64=0, error64=0
 
+        print("Deallocating myself")
+
         if self._initialized():
+            print("I was initialized, so call pardiso to release the memory.")
             with nogil:
                 PyThread_acquire_lock(self.lock, 1)
                 if self._is_32:
@@ -551,9 +555,13 @@ cdef class MKLPardisoSolver:
                 self.handle[i] = NULL
 
         if self.lock:
+            print("I had a lock")
             #dealloc lock
             PyThread_free_lock(self.lock)
             self.lock = NULL
+            print("lock is no more lock")
+
+        print("Deallocated myself")
 
     cdef _analyze(self):
         #phase = 11

From b8d82de0b6d547e9c0bc87da6517d3623c14ca9a Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 19:54:13 -0600
Subject: [PATCH 12/25] explicitly initialize numpy's c api

---
 pydiso/mkl_solver.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index ddacbdb..ca1b725 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -14,6 +14,8 @@ import numpy as np
 import scipy.sparse as sp
 import os
 
+np.import_array()
+
 cdef extern from 'mkl.h':
     ctypedef long long MKL_INT64
     ctypedef int MKL_INT

From 1c2e5f11e309adf5241d349937dc7ed067824aa8 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 19:55:04 -0600
Subject: [PATCH 13/25] fix

---
 pydiso/mkl_solver.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index ca1b725..5f6e54f 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -1,5 +1,5 @@
 #cython: language_level=3
-cimport numpy as cnp
+cimport numpy as np
 import cython
 from cpython.pythread cimport (
     PyThread_type_lock,
@@ -14,6 +14,8 @@ import numpy as np
 import scipy.sparse as sp
 import os
 
+# We use np.PyArray_DATA to grab the pointer
+# to a numpy array.
 np.import_array()
 
 cdef extern from 'mkl.h':

From 17554b5ecaafb588085b61db3e942b050cae1b9f Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 19:57:58 -0600
Subject: [PATCH 14/25] fix more

---
 pydiso/mkl_solver.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index 5f6e54f..b15c352 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -418,8 +418,8 @@ cdef class MKLPardisoSolver:
             raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}")
         x = np.require(x, requirements='F')
 
-        cdef void * bp = cnp.PyArray_DATA(b)
-        cdef void * xp = cnp.PyArray_DATA(x)
+        cdef void * bp = np.PyArray_DATA(b)
+        cdef void * xp = np.PyArray_DATA(x)
 
         if bp == xp:
             raise PardisoError("b and x must be different arrays")
@@ -526,7 +526,7 @@ cdef class MKLPardisoSolver:
 
     cdef _set_A(self, data):
         self._Adata = data
-        self.a = cnp.PyArray_DATA(data)
+        self.a = np.PyArray_DATA(data)
 
     def __dealloc__(self):
         # Need to call pardiso with phase=-1 to release memory

From 68a5af3aeb9e14f5ea46efe244299b16e071fe6c Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 20:07:13 -0600
Subject: [PATCH 15/25] flush standard out

---
 tests/test_pydiso.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py
index addd733..5f65254 100644
--- a/tests/test_pydiso.py
+++ b/tests/test_pydiso.py
@@ -94,6 +94,7 @@ def test_solver(A, matrix_type):
     b = A@x
 
     solver = Solver(A, matrix_type=matrix_type)
+    sys.stdout.flush()
     x2 = solver.solve(b)
 
     eps = np.finfo(dtype).eps

From 579df3a4cda1b2c036225e815c5a05cda280ed0f Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 20:08:57 -0600
Subject: [PATCH 16/25] make more obvious when the test fails

---
 .github/workflows/python-package-conda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index ebac542..e0a2b5b 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -68,7 +68,7 @@ jobs:
       run: |
         ${{ matrix.coverage && 'coverage run -m' || '' }} pytest -s -v
         ${{ matrix.coverage && 'coverage xml' || '' }}
-      continue-on-error: ${{ matrix.os == 'windows-latest' && matrix.mkl-version == '2024'}}
+#      continue-on-error: ${{ matrix.os == 'windows-latest' && matrix.mkl-version == '2024'}}
 
     - name: Upload coverage
       if: ${{ matrix.coverage }}

From 5d47631c7555a26ce5318fea44271e2653b5812b Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 20:17:25 -0600
Subject: [PATCH 17/25] More!

---
 .github/workflows/python-package-conda.yml |  6 +++---
 pydiso/mkl_solver.pyx                      | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index e0a2b5b..d2fd583 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -39,8 +39,7 @@ jobs:
       uses: conda-incubator/setup-miniconda@v3
       with:
         python-version: ${{ matrix.python-version }}
-        mamba-version: '*'
-        channels: conda-forge, defaults
+        channels: defaults
         channel-priority: true
         activate-environment: dev
 
@@ -52,7 +51,8 @@ jobs:
 
     - name: Create environment
       run: |
-        conda install --quiet --yes pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \
+        conda install --quiet --yes -c conda-forge \
+          pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \
           mkl-devel pkg-config meson-python meson ninja setuptools_scm \
           ${{ matrix.coverage && 'coverage' || ''}}
 
diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index b15c352..e7a075f 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -13,6 +13,7 @@ import warnings
 import numpy as np
 import scipy.sparse as sp
 import os
+import sys
 
 # We use np.PyArray_DATA to grab the pointer
 # to a numpy array.
@@ -418,9 +419,13 @@ cdef class MKLPardisoSolver:
             raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}")
         x = np.require(x, requirements='F')
 
+        print("Validated input arrays")
+
         cdef void * bp = np.PyArray_DATA(b)
         cdef void * xp = np.PyArray_DATA(x)
 
+        print("assigned x and b pointers")
+
         if bp == xp:
             raise PardisoError("b and x must be different arrays")
 
@@ -430,6 +435,8 @@ cdef class MKLPardisoSolver:
             self.set_iparm(11, 2)
         else:
             self.set_iparm(11, 0)
+
+        print("updated iparm 11")
         self._solve(bp, xp, nrhs)
         return x
 
@@ -591,6 +598,8 @@ cdef class MKLPardisoSolver:
         if(not self._factored):
             raise PardisoError("Cannot solve without a previous factorization.")
 
+        print("trying to solve")
+
         with nogil:
             err = self._run_pardiso(33, b, x, nrhs_in)
 
@@ -604,9 +613,16 @@ cdef class MKLPardisoSolver:
 
         PyThread_acquire_lock(self.lock, 1)
         if self._is_32:
+            with gil:
+                print("Calling pardiso")
+                sys.stdout.flush()
+
             pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype,
                     &phase, &self._par.n, self.a, &self._par.ia[0], &self._par.ja[0],
                     &self._par.perm[0], &nrhs, self._par.iparm, &self._par.msglvl, b, x, &error)
+
+            with gil:
+                print("Called pardiso, error was ", error)
         else:
             pardiso_64(self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype,
                     &phase64, &self._par64.n, self.a, &self._par64.ia[0], &self._par64.ja[0],

From 9d870f266d326e1640a7f908b1b4c6eb91cb4790 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 21:23:44 -0600
Subject: [PATCH 18/25] Manually increment and decrement the x and b array's
 refcounts

---
 pydiso/mkl_solver.pyx | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index e7a075f..cbada90 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -1,6 +1,10 @@
 #cython: language_level=3
 cimport numpy as np
 import cython
+from cpython.ref cimport(
+    Py_INCREF,
+    Py_DECREF,
+)
 from cpython.pythread cimport (
     PyThread_type_lock,
     PyThread_allocate_lock,
@@ -421,7 +425,9 @@ cdef class MKLPardisoSolver:
 
         print("Validated input arrays")
 
+        Py_INCREF(b)
         cdef void * bp = np.PyArray_DATA(b)
+        Py_INCREF(x)
         cdef void * xp = np.PyArray_DATA(x)
 
         print("assigned x and b pointers")
@@ -437,7 +443,13 @@ cdef class MKLPardisoSolver:
             self.set_iparm(11, 0)
 
         print("updated iparm 11")
-        self._solve(bp, xp, nrhs)
+        try:
+            self._solve(bp, xp, nrhs)
+        except Exception as err:
+            raise err
+        finally:
+            Py_DECREF(b)
+            Py_DECREF(x)
         return x
 
     @property

From 1edad1b923a71a86d69dd86aaaa769a2de81538d Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 21:34:19 -0600
Subject: [PATCH 19/25] flush here too

---
 pydiso/mkl_solver.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index cbada90..01ddf2d 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -433,6 +433,8 @@ cdef class MKLPardisoSolver:
         print("assigned x and b pointers")
 
         if bp == xp:
+            Py_DECREF(b)
+            Py_DECREF(x)
             raise PardisoError("b and x must be different arrays")
 
         cdef int_t nrhs = b.shape[1] if b.ndim == 2 else 1
@@ -635,6 +637,7 @@ cdef class MKLPardisoSolver:
 
             with gil:
                 print("Called pardiso, error was ", error)
+                sys.stdout.flush()
         else:
             pardiso_64(self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype,
                     &phase64, &self._par64.n, self.a, &self._par64.ia[0], &self._par64.ja[0],

From 577203159c93f436c5206f92c50cf132e579f894 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 21:52:27 -0600
Subject: [PATCH 20/25] ensure all A matrix information is c-contiguous.

---
 pydiso/mkl_solver.pyx | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index 01ddf2d..1bfbe89 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -542,11 +542,11 @@ cdef class MKLPardisoSolver:
         par.iparm[55] = 0  # Internal function used to work with pivot and calculation of diagonal arrays turned off.
         par.iparm[59] = 0  # operate in-core mode
 
-        par.ia = np.require(A.indptr, dtype=int_dtype)
-        par.ja = np.require(A.indices, dtype=int_dtype)
+        par.ia = np.require(A.indptr, dtype=int_dtype, requirements='C')
+        par.ja = np.require(A.indices, dtype=int_dtype, requirements='C')
 
     cdef _set_A(self, data):
-        self._Adata = data
+        self._Adata = np.require(data, requirements='C')
         self.a = np.PyArray_DATA(data)
 
     def __dealloc__(self):
@@ -629,6 +629,17 @@ cdef class MKLPardisoSolver:
         if self._is_32:
             with gil:
                 print("Calling pardiso")
+
+                print("maxfct", self._par.maxfct)
+                print("mnum", self._par.mnum)
+                print("mtype", self._par.mtype)
+                print("phase", phase)
+                print("n", self._par.n)
+                print("nrhs" nrhs)
+                print("msglvl", self._par.msglvl)
+                print("ia")
+                for i in range(self._par.n):
+                    print()
                 sys.stdout.flush()
 
             pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype,

From 00871be032e9d76ca739bffd1996d98841f0fe09 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Fri, 13 Sep 2024 22:08:54 -0600
Subject: [PATCH 21/25] information

---
 pydiso/mkl_solver.pyx | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
index 1bfbe89..971ab52 100644
--- a/pydiso/mkl_solver.pyx
+++ b/pydiso/mkl_solver.pyx
@@ -635,11 +635,29 @@ cdef class MKLPardisoSolver:
                 print("mtype", self._par.mtype)
                 print("phase", phase)
                 print("n", self._par.n)
-                print("nrhs" nrhs)
+                print("nrhs", nrhs)
                 print("msglvl", self._par.msglvl)
                 print("ia")
-                for i in range(self._par.n):
-                    print()
+                n = self._par.n
+                for i in range(n+1):
+                    print(self._par.ia[i])
+                nnz = self._par.ia[n]
+                print("ja")
+                for i in range(nnz):
+                    print(self._par.ja[i])
+
+                print("a")
+                for i in range(nnz):
+                    print(self._Adata[i])
+                if x:
+                    print("x")
+                    for i in range(n):
+                        x[i]
+                if b:
+                    print("b")
+                    for i in range(n):
+                        b[i]
+
                 sys.stdout.flush()
 
             pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype,

From cd7ce76a5ababaadb3970c9df0e32b6d44011b96 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Wed, 18 Sep 2024 23:56:49 -0600
Subject: [PATCH 22/25] re-configure

use template for integer type

finalize

add space

don't worry about math_defines (not using anything special from it anyways)

fix extra line continuation character

fix meson build

does tbb work?

set seq as default

use mkl for blas on windows
---
 .github/workflows/python-package-conda.yml |   6 +-
 pydiso/_mkl_solver.pyx.in                  | 299 +++++++++
 pydiso/meson.build                         |  47 +-
 pydiso/mkl_solver.py                       | 352 +++++++++++
 pydiso/mkl_solver.pyx                      | 676 ---------------------
 pyproject.toml                             |   2 +-
 tests/test_pydiso.py                       |  21 +-
 7 files changed, 685 insertions(+), 718 deletions(-)
 create mode 100644 pydiso/_mkl_solver.pyx.in
 create mode 100644 pydiso/mkl_solver.py
 delete mode 100644 pydiso/mkl_solver.pyx

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index d2fd583..bfe46df 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -54,14 +54,16 @@ jobs:
         conda install --quiet --yes -c conda-forge \
           pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \
           mkl-devel pkg-config meson-python meson ninja setuptools_scm \
-          ${{ matrix.coverage && 'coverage' || ''}}
+          ${{ matrix.coverage && 'coverage' || ''}} \
+          ${{ matrix.os == 'windows-latest' && '"libblas=*=*mkl"' || ''}}
 
     - name: Install Our Package
       run: |
         python -m pip install --no-build-isolation --verbose --editable . \
           --config-setting=compile-args=-v \
           ${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} \
-          ${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="--vsenv"' || ''}}
+          ${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="-Dvsenv=true"' || ''}}
+
         conda list
 
     - name: Run Tests
diff --git a/pydiso/_mkl_solver.pyx.in b/pydiso/_mkl_solver.pyx.in
new file mode 100644
index 0000000..e7449b2
--- /dev/null
+++ b/pydiso/_mkl_solver.pyx.in
@@ -0,0 +1,299 @@
+#cython: language_level=3
+cimport numpy as np
+import cython
+from cpython.pythread cimport (
+    PyThread_type_lock,
+    PyThread_allocate_lock,
+    PyThread_acquire_lock,
+    PyThread_release_lock,
+    PyThread_free_lock
+)
+
+import numpy as np
+import os
+
+# We use np.PyArray_DATA to grab the pointer
+# to a numpy array.
+np.import_array()
+
+cdef extern from 'mkl.h':
+    ctypedef long long MKL_INT64
+    ctypedef int MKL_INT
+
+ctypedef MKL_INT int_t
+ctypedef MKL_INT64 long_t
+
+cdef extern from 'mkl.h':
+    int MKL_DOMAIN_PARDISO
+
+    ctypedef struct MKLVersion:
+        int MajorVersion
+        int MinorVersion
+        int UpdateVersion
+        char * ProductStatus
+        char * Build
+        char * Processor
+        char * Platform
+
+    void mkl_get_version(MKLVersion* pv)
+
+    void mkl_set_num_threads(int nth)
+    int mkl_domain_set_num_threads(int nt, int domain)
+    int mkl_get_max_threads()
+    int mkl_domain_get_max_threads(int domain)
+
+    ctypedef int (*ProgressEntry)(int* thread, int* step, char* stage, int stage_len) except? -1;
+    ProgressEntry mkl_set_progress(ProgressEntry progress);
+
+    ctypedef void * _MKL_DSS_HANDLE_t
+
+    void pardiso(_MKL_DSS_HANDLE_t, const int_t*, const int_t*, const int_t*,
+                 const int_t *, const int_t *, const void *, const int_t *,
+                 const int_t *, int_t *, const int_t *, int_t *,
+                 const int_t *, void *, void *, int_t *) nogil
+
+    void pardiso_64(_MKL_DSS_HANDLE_t, const long_t *, const long_t *, const long_t *,
+                    const long_t *, const long_t *, const void *, const long_t *,
+                    const long_t *, long_t *, const long_t *, long_t *,
+                    const long_t *, void *, void *, long_t *) nogil
+
+
+_err_messages = {0:"no error",
+                -1:'input inconsistent',
+                -2:'not enough memory',
+                -3:'reordering problem',
+                -4:'zero pivot, numerical factorization or iterative refinement problem',
+                -5:'unclassified (internal) error',
+                -6:'reordering failed',
+                -7:'diagonal matrix is singular',
+                -8:'32-bit integer overflow problem',
+                -9:'not enough memory for OOC',
+                -10:'error opening OOC files',
+                -11:'read/write error with OOC files',
+                -12:'pardiso_64 called from 32-bit library',
+                }
+
+class PardisoError(Exception):
+    pass
+
+class PardisoWarning(UserWarning):
+    pass
+
+
+#call pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, perm, nrhs, iparm, msglvl, b, x, error)
+cdef int mkl_progress(int *thread, int* step, char* stage, int stage_len) nogil:
+    # must be a nogil process to pass to mkl pardiso progress reporting
+    with gil:
+        # must reacquire the gil to print out back to python.
+        print(thread[0], step[0], stage, stage_len)
+    return 0
+
+cdef int mkl_no_progress(int *thread, int* step, char* stage, int stage_len) nogil:
+    return 0
+
+
+def get_mkl_max_threads():
+    """
+    Returns the current number of openMP threads available to the MKL Library
+    """
+    return mkl_get_max_threads()
+
+def get_mkl_pardiso_max_threads():
+    """
+    Returns the current number of openMP threads available to the Pardiso functions
+    """
+    return mkl_domain_get_max_threads(MKL_DOMAIN_PARDISO)
+
+def set_mkl_threads(num_threads=None):
+    """
+    Sets the number of openMP threads available to the MKL library.
+
+    Parameters
+    ----------
+    num_threads : None or int
+        number of threads to use for the MKL library.
+        None will set the number of threads to that returned by `os.cpu_count()`.
+    """
+    if num_threads is None:
+        num_threads = os.cpu_count()
+    elif num_threads<=0:
+        raise ValueError('Number of threads must be greater than 0')
+    mkl_set_num_threads(num_threads)
+
+def set_mkl_pardiso_threads(num_threads=None):
+    """
+    Sets the number of openMP threads available to the Pardiso functions
+
+    Parameters
+    ----------
+    num_threads : None or int
+        Number of threads to use for the MKL Pardiso routines.
+        None (or 0) will set the number of threads to `get_mkl_max_threads`
+    """
+    if num_threads is None:
+        num_threads = 0
+    elif num_threads<0:
+        raise ValueError('Number of threads must be greater than 0')
+    mkl_domain_set_num_threads(num_threads, MKL_DOMAIN_PARDISO)
+
+def get_mkl_version():
+    """
+    Returns a dictionary describing the version of Intel Math Kernel Library used
+    """
+    cdef MKLVersion vers
+    mkl_get_version(&vers)
+    return vers
+
+def get_mkl_int_size():
+    """Return the size of the MKL_INT at compile time in bytes.
+
+    Returns
+    -------
+    int
+    """
+    return sizeof(MKL_INT)
+
+
+def get_mkl_int64_size():
+    """Return the size of the MKL_INT64 at compile time in bytes.
+
+    Returns
+    -------
+    int
+    """
+    return sizeof(MKL_INT64)
+
+
+
+ctypedef fused real_or_complex:
+    np.float32_t
+    np.float64_t
+    np.complex64_t
+    np.complex128_t
+
+
+{{for int_type in ["int_t", "long_t"]}}
+cdef class _PardisoHandle_{{int_type}}:
+    cdef _MKL_DSS_HANDLE_t handle[64]
+    cdef PyThread_type_lock lock
+
+    cdef {{int_type}} n, maxfct, mnum, msglvl
+    cdef public {{int_type}} matrix_type
+    cdef public {{int_type}}[64] iparm
+    cdef public {{int_type}}[:] perm
+
+    @cython.boundscheck(False)
+    def __cinit__(self, A_dat_dtype, n, matrix_type, maxfct, mnum, msglvl):
+        self.lock = PyThread_allocate_lock()
+
+        np_int_dtype = np.dtype(f"i{sizeof({{int_type}})}")
+
+        for i in range(64):
+            self.handle[i] = NULL
+
+        self.n = n
+        self.matrix_type = matrix_type
+        self.maxfct = maxfct
+        self.mnum = mnum
+        self.msglvl = msglvl
+
+        if self.msglvl:
+            #for reporting factorization progress via python's `print`
+            mkl_set_progress(mkl_progress)
+        else:
+            mkl_set_progress(mkl_no_progress)
+
+        is_single_precision = np.issubdtype(A_dat_dtype, np.single) or np.issubdtype(A_dat_dtype, np.csingle)
+
+        self.perm = np.empty(self.n, dtype=np_int_dtype)
+
+        for i in range(64):
+            self.iparm[i] = 0  # ensure these all start at 0
+
+        # set default parameters
+        self.iparm[0] = 1  # tell pardiso to not reset these values on the first call
+        self.iparm[1] = 2  # The nested dissection algorithm from the METIS
+        self.iparm[3] = 0  # The factorization is always computed as required by phase.
+        self.iparm[4] = 2  # fill perm with computed permutation vector
+        self.iparm[5] = 0  # The array x contains the solution; right-hand side vector b is kept unchanged.
+        self.iparm[7] = 0  # The solver automatically performs two steps of iterative refinement when perterbed pivots are obtained
+        self.iparm[9] = 13 if matrix_type in [11, 13] else 8
+        self.iparm[10] = 1 if matrix_type in [11, 13] else 0
+        self.iparm[11] = 0  # Solve a linear system AX = B (as opposed to A.T or A.H)
+        self.iparm[12] = 1 if matrix_type in [11, 13] else 0
+        self.iparm[17] = -1  # Return the number of non-zeros in this value after first call
+        self.iparm[18] = 0  # do not report flop count
+        self.iparm[20] = 1 if matrix_type in [-2, -4, 6] else 0
+        self.iparm[23] = 0  # classic (not parallel) factorization
+        self.iparm[24] = 0  # default behavoir of parallel solving
+        self.iparm[26] = 1  # Do not check the input matrix
+        self.iparm[27] = is_single_precision  # 1 if single, 0 if double
+        self.iparm[30] = 0  # this would be used to enable sparse input/output for solves
+        self.iparm[33] = 0  # optimal number of thread for CNR mode
+        self.iparm[34] = 1  # zero based indexing
+        self.iparm[35] = 0  # Do not compute schur complement
+        self.iparm[36] = 0  # use CSR storage format
+        self.iparm[38] = 0  # Do not use low rank update
+        self.iparm[42] = 0  # Do not compute the diagonal of the inverse
+        self.iparm[55] = 0  # Internal function used to work with pivot and calculation of diagonal arrays turned off.
+        self.iparm[59] = 0  # operate in-core mode
+
+    def initialized(self):
+        return self._initialized()
+
+    cdef int _initialized(self) noexcept nogil:
+        # If any of the handle pointers are not null, return 1
+        cdef int i
+        for i in range(64):
+            if self.handle[i]:
+                return 1
+        return 0
+
+    def set_iparm(self, {{int_type}} i, {{int_type}} val):
+        self.iparm[i] = val
+
+    @cython.boundscheck(False)
+    cpdef {{int_type}} call_pardiso(self,
+            {{int_type}} phase,
+            real_or_complex[::1] a_data,
+            {{int_type}}[::1] a_indptr,
+            {{int_type}}[::1] a_indices,
+            real_or_complex[::1, :] rhs,
+            real_or_complex[::1, :] out
+    ):
+        cdef {{int_type}} error, nrhs
+        with nogil:
+            nrhs = rhs.shape[1]
+            PyThread_acquire_lock(self.lock, mode=1)
+            pardiso{{if int_type == "long_t"}}_64{{endif}}(
+                    self.handle, &self.maxfct, &self.mnum, &self.matrix_type, &phase, &self.n,
+                    &a_data[0], &a_indptr[0], &a_indices[0], &self.perm[0],
+                    &nrhs, self.iparm, &self.msglvl,
+                    &rhs[0, 0], &out[0, 0], &error
+                )
+            PyThread_release_lock(self.lock)
+        return error
+
+    @cython.boundscheck(False)
+    def __dealloc__(self):
+        # Need to call pardiso with phase=-1 to release memory (if it was initialized)
+        cdef {{int_type}} phase = -1, nrhs = 0, error = 0
+
+        with nogil:
+            PyThread_acquire_lock(self.lock, mode=1)
+            if self._initialized():
+                pardiso{{if int_type == "long_t"}}_64{{endif}}(
+                            self.handle, &self.maxfct, &self.mnum, &self.matrix_type,
+                            &phase, &self.n, NULL, NULL, NULL, NULL, &nrhs, self.iparm,
+                            &self.msglvl, NULL, NULL, &error)
+                if error == 0:
+                    for i in range(64):
+                        self.handle[i] = NULL
+            PyThread_release_lock(self.lock)
+        if error != 0:
+            raise MemoryError("Pardiso Memory release error: " + _err_messages[error])
+        if self.lock:
+            #deallocate the lock
+            PyThread_free_lock(self.lock)
+            self.lock = NULL
+{{endfor}}
\ No newline at end of file
diff --git a/pydiso/meson.build b/pydiso/meson.build
index 619a5dc..5bdd8a9 100644
--- a/pydiso/meson.build
+++ b/pydiso/meson.build
@@ -1,3 +1,19 @@
+cython_file = custom_target(
+    input: '_mkl_solver.pyx.in',
+    output: '_mkl_solver.pyx',
+    command: [py,
+  '-c',
+'''
+import sys
+from pathlib import Path
+from Cython.Tempita import sub
+
+template = Path(sys.argv[1]).read_text("utf8")
+output = sub(template)
+Path(sys.argv[2]).write_text(output, "utf8")
+''', '@INPUT@', '@OUTPUT@']
+)
+
 # NumPy include directory
 np_dep = dependency('numpy')
 numpy_nodepr_api = ['-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION']
@@ -15,7 +31,6 @@ endif
 # MKL-specific options
 _threading_opt = get_option('mkl-threading')
 if _threading_opt == 'auto'
-  # openmp.pc not included with conda-forge distribution (yet)
   mkl_dep_name += '-seq'
 else
   mkl_dep_name += '-' + _threading_opt
@@ -45,37 +60,13 @@ else
 
 endif
 
-# Deal with M_PI & friends; add `use_math_defines` to c_args or cpp_args
-# Cython doesn't always get this right itself (see, e.g., gh-16800), so
-# explicitly add the define as a compiler flag for Cython-generated code.
-is_windows = host_machine.system() == 'windows'
-if is_windows
-  use_math_defines = ['-D_USE_MATH_DEFINES']
-else
-  use_math_defines = []
-endif
-
 c_undefined_ok = ['-Wno-maybe-uninitialized']
-cython_c_args = [numpy_nodepr_api, use_math_defines, '-DCYTHON_CCOMPLEX=0']
-
-cython_file = 'mkl_solver.pyx'
-
-if get_option('b_coverage')
-  # tell cython to enable linetracing
-  add_project_arguments(['--directive', 'linetrace=true'], language : 'cython')
-  # tell the c_compiler to definie the CYTHON_TRACE_NOGIL
-  add_project_arguments(['-DCYTHON_TRACE_NOGIL=1'], language : 'c')
-
-  # compile the .c file from the .pyx file in it's directory.
-  # These should include the default options passed to the cython compiler
-  cython_file_full_path = meson.current_source_dir() / cython_file
-  run_command(cython, '-M', '--fast-fail', '-3', '--directive', 'linetrace=true', cython_file_full_path)
-endif
+cython_c_args = [numpy_nodepr_api, '-DCYTHON_CCOMPLEX=0']
 
 module_path = 'pydiso'
 
 py.extension_module(
-    'mkl_solver',
+    '_mkl_solver',
     cython_file,
     c_args: cython_c_args,
     install: true,
@@ -84,7 +75,7 @@ py.extension_module(
 )
 
 python_sources = [
-  '__init__.py',
+  '__init__.py', 'mkl_solver.py'
 ]
 
 py.install_sources(
diff --git a/pydiso/mkl_solver.py b/pydiso/mkl_solver.py
new file mode 100644
index 0000000..a4cf6a7
--- /dev/null
+++ b/pydiso/mkl_solver.py
@@ -0,0 +1,352 @@
+import numpy as np
+import scipy.sparse as sp
+from ._mkl_solver import (
+    _PardisoHandle_int_t,
+    _PardisoHandle_long_t,
+    get_mkl_int_size,
+    get_mkl_int64_size,
+    get_mkl_max_threads,
+    get_mkl_pardiso_max_threads,
+    set_mkl_threads,
+    set_mkl_pardiso_threads,
+    get_mkl_version,
+    _err_messages,
+    PardisoWarning,
+    PardisoError,
+)
+import warnings
+
+
+MATRIX_TYPES ={
+    'real_structurally_symmetric': 1,
+    'real_symmetric_positive_definite': 2,
+    'real_symmetric_indefinite': -2,
+    'complex_structurally_symmetric': 3,
+    'complex_hermitian_positive_definite': 4,
+    'complex_hermitian_indefinite': -4,
+    'complex_symmetric': 6,
+    'real_nonsymmetric': 11,
+    'complex_nonsymmetric': 13}
+"""dict : matrix type string keys and corresponding integer value to describe
+the different supported matrix types.
+"""
+
+
+class PardisoTypeConversionWarning(
+        PardisoWarning, sp.SparseEfficiencyWarning):
+    pass
+
+
+def _ensure_csr(A, sym=False):
+    if not (sp.isspmatrix_csr(A)):
+        if sym and sp.isspmatrix_csc(A):
+            A = A.T
+        else:
+            warnings.warn("Converting %s matrix to CSR format."
+                         %A.__class__.__name__, PardisoTypeConversionWarning)
+            A = A.tocsr()
+    return A
+
+
+class MKLPardisoSolver:
+
+    def __init__(self, A, matrix_type=None, factor=True, verbose=False):
+        '''ParidsoSolver(A, matrix_type=None, factor=True, verbose=False)
+        An interface to the intel MKL pardiso sparse matrix solver.
+
+        This is a solver class for a scipy sparse matrix using the Pardiso sparse
+        solver in the Intel Math Kernel Library.
+
+        It will factorize the sparse matrix in three steps: a symbolic
+        factorization stage, a numerical factorization stage, and a solve stage.
+
+        The purpose is to construct a sparse factorization that can be repeatedly
+        called to solve for multiple right-hand sides.
+
+        Parameters
+        ----------
+        A : scipy.sparse.spmatrix
+            A sparse matrix preferably in a CSR format.
+        matrix_type : str, int, or None, optional
+            A string describing the matrix type, or it's corresponding int code.
+            If None, then assumed to be nonsymmetric matrix.
+        factor : bool, optional
+            Whether to perform the factorization stage upon instantiation of the class.
+        verbose : bool, optional
+            Enable verbose output from the pardiso solver.
+
+        Notes
+        -----
+
+        The supported matrix types are: real symmetric positive definite, real
+        symmetric indefinite, real structurally symmetric, real nonsymmetric,
+        complex hermitian positive definite, complex hermitian indefinite, complex
+        symmetric, complex structurally symmetric, and complex nonsymmetric.
+        The solver supports both single and double precision matrices.
+
+        Examples
+        --------
+
+        Solve a symmetric positive definite system by first forming a simple 5 point
+        laplacian stencil with a zero boundary condition. Then we create a known
+        solution vector to compare to result with.
+
+        >>> import scipy.sparse as sp
+        >>> from pydiso.mkl_solver import MKLPardisoSolver
+        >>> nx, ny = 5, 7
+        >>> Dx = sp.diags((-1, 1), (-1, 0), (nx+1, nx))
+        >>> Dy = sp.diags((-1, 1), (-1, 0), (ny+1, ny))
+        >>> A = sp.kron(sp.eye(nx), Dy.T @ Dy) + sp.kron(Dx.T @ Dx, sp.eye(ny))
+        >>> x = np.linspace(-10, 10, nx*ny)
+        >>> b = A @ x
+
+        Next we create the solver object using pardiso
+
+        >>> Ainv = MKLPardisoSolver(A, matrix_type='real_symmetric_positive_definite')
+        >>> x_solved = Ainv.solve(b)
+        >>> np.allclose(x, x_solved)
+        True
+        '''
+        if not sp.issparse(A):
+            raise TypeError(f"type(A)={type(A).__name__} must be a sparse array or sparse matrix.")
+
+        if A.ndim != 2:
+            raise ValueError(f"A.ndim={A.ndim} must be to 2.")
+
+        n_row, n_col = A.shape
+        if n_row != n_col:
+            raise ValueError(f"A with shape {A.shape} is not a square matrix.")
+        self.shape = n_row, n_col
+
+        data_dtype = A.dtype
+        if not(
+            np.issubdtype(data_dtype, np.single) or
+            np.issubdtype(data_dtype, np.double) or
+            np.issubdtype(data_dtype, np.csingle) or
+            np.issubdtype(data_dtype, np.cdouble)
+        ):
+            raise ValueError(
+                f"Unrecognized matrix data type, {data_dtype}. Must be single or double precision of real or complex values."
+            )
+        self._data_dtype = data_dtype
+
+        is_complex = np.issubdtype(data_dtype, np.complexfloating)
+
+        if matrix_type is None:
+            if is_complex:
+                matrix_type = MATRIX_TYPES['complex_nonsymmetric']
+            else:
+                matrix_type = MATRIX_TYPES['real_nonsymmetric']
+
+        if not(matrix_type in MATRIX_TYPES or matrix_type in MATRIX_TYPES.values()):
+            raise TypeError(f'Unrecognized matrix_type: {matrix_type}')
+        if matrix_type in MATRIX_TYPES:
+            matrix_type = MATRIX_TYPES[matrix_type]
+
+        if matrix_type in [1, 2, -2, 11]:
+            if is_complex:
+                raise ValueError(
+                    f"Complex matrix dtype and matrix_type={matrix_type} are inconsistent, expected a real matrix"
+                )
+        else:
+            if not is_complex:
+                raise ValueError(
+                    f"Real matrix dtype and matrix_type={matrix_type} are inconsistent, expected a complex matrix"
+                )
+
+        self.matrix_type = matrix_type
+
+        indptr = np.asarray(A.indptr)  # double check it's a numpy array
+        mkl_int_size= get_mkl_int_size()
+        mkl_int64_size = get_mkl_int64_size()
+
+        target_int_size = mkl_int_size if indptr.itemsize <= mkl_int_size else mkl_int64_size
+        self._ind_dtype = np.dtype(f"i{target_int_size}")
+
+        data, indptr, indices = self._validate_matrix(A)
+        self._data = data
+        self._indptr = indptr
+        self._indices = indices
+
+        if target_int_size == mkl_int_size:
+            HandleClass = _PardisoHandle_int_t
+        else:
+            HandleClass = _PardisoHandle_long_t
+        self._handle = HandleClass(self._data_dtype, self.shape[0], matrix_type, maxfct=1, mnum=1, msglvl=verbose)
+
+        self._analyze()
+        self._factored = False
+        if factor:
+            self._factor()
+
+    def refactor(self, A):
+        """solver.refactor(A)
+        re-use a symbolic factorization with a new `A` matrix.
+
+        Note
+        ----
+        Must have the same non-zero pattern as the initial `A` matrix.
+        If `full_refactor=False`, the initial factorization is used as a
+        preconditioner to a Krylov subspace solver in the solve step.
+
+        Parameters
+        ----------
+        A : scipy.sparse.spmatrix
+            A sparse matrix preferably in a CSR format.
+        """
+        #Assumes that the matrix A has the same non-zero pattern and ordering
+        #as the initial A matrix
+
+        if not sp.issparse(A):
+            raise TypeError("A is not a sparse matrix.")
+        if A.shape != self.shape:
+            raise ValueError("A is not the same size as the previous matrix.")
+        data, indptr, indices = self._validate_matrix(A)
+        if len(data) != len(self._data):
+            raise ValueError("new A matrix does not have the same number of non zeros.")
+
+        self._data = data
+        self._factor()
+
+    def __call__(self, b):
+        return self.solve(b)
+
+    def solve(self, b, x=None, transpose=False):
+        """solve(self, b, x=None, transpose=False)
+        Solves the equation AX=B using the factored A matrix
+
+        Note
+        ----
+        The data will be copied if not contiguous in all cases. If multiple rhs
+        are given, the input arrays will be copied if not in a contiguous
+        Fortran order.
+
+        Parameters
+        ----------
+        b : numpy.ndarray
+            array of shape 1D or 2D for the right hand side of the equation
+            (of the same data type as A).
+        x : numpy.ndarray, optional
+            A pre-allocated output array (of the same data type as A).
+            If None, a new array is constructed.
+        transpose : bool, optional
+            If True, it will solve A^TX=B using the factored A matrix.
+
+        Returns
+        -------
+        numpy array
+            array containing the solution (in Fortran ordering)
+        """
+        if(not self._factored):
+            raise PardisoError("Cannot solve without a previous factorization.")
+
+        if b.dtype != self._data_dtype:
+            warnings.warn("rhs does not have the same data type as A",
+                            PardisoTypeConversionWarning)
+            b = b.astype(self._data_dtype)
+        b = np.atleast_1d(b)
+        b_was_1d = b.ndim == 1
+        if b_was_1d:
+            b = b[:, None]
+        if b.ndim != 2:
+            raise ValueError(f"b.ndim={b.ndim} must be 1 or 2.")
+        if b.shape[0] != self.shape[0]:
+            raise ValueError(f"incorrect length of b, expected {self.shape[0]}, got {b.shape[0]}")
+        b = np.require(b, requirements='F')
+
+        if x is None:
+            x = np.empty_like(b)
+            x_was_1d = b_was_1d
+        else:
+            if(x.dtype!=self._data_dtype):
+                warnings.warn("output does not have the same data type as A",
+                                PardisoTypeConversionWarning)
+                x = x.astype(self._data_dtype)
+            x = np.atleast_1d(x)
+            x_was_1d = x.ndim == 1
+            if x_was_1d:
+                x = x[:, None]
+            if x.ndim != 2:
+                raise ValueError(f"x.ndim={x.ndim} must be 1 or 2.")
+            if x.shape[0] != self.shape[0]:
+                raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}")
+            x = np.require(x, requirements='F')
+
+        if b.shape[1] != x.shape[1]:
+            raise ValueError(
+                f"Inconsistent shapes of right hand side, {b.shape} and output vector, {x.shape}")
+
+        if x is b or (x.base is not None and (x.base is b.base)):
+            raise ValueError("x and b cannot point to the same memory")
+
+        self._handle.set_iparm(11, 2 if transpose else 0)
+
+        phase = 33
+        error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, b, x)
+        if error:
+            raise PardisoError("Solve step error, "+_err_messages[error])
+        if x_was_1d:
+            x = x[:, 0]
+        return x
+
+    @property
+    def perm(self):
+        """ Fill-reducing permutation vector used inside pardiso.
+        """
+        return np.array(self._handle.perm)
+
+    @property
+    def iparm(self):
+        """ Parameter options for the pardiso solver.
+        """
+        return np.array(self._handle.iparm)
+
+    def _validate_matrix(self, mat):
+
+        if self.matrix_type in [-2, 2, -4, 4, 6]:
+            #  Symmetric matrices must have only the upper triangle
+            if sp.isspmatrix_csc(mat):
+                mat = mat.T  # Transpose to get a CSR matrix since it's symmetric
+            mat = sp.triu(mat, format='csr')
+        mat = _ensure_csr(mat)
+        mat.sort_indices()
+        mat.sum_duplicates()
+
+        data = np.require(mat.data, self._data_dtype, requirements="C")
+        indptr = np.require(mat.indptr, self._ind_dtype, requirements="C")
+        indices = np.require(mat.indices, self._ind_dtype, requirements="C")
+        return data, indptr, indices
+
+
+    def set_iparm(self, i, val):
+        if i > 63 or i < 0:
+            raise IndexError(f"index {i} is out of bounds for size 64 array")
+        if i not in [
+            1, 3, 4, 5, 7, 9, 10, 11, 12, 17, 18, 20, 23,
+            24, 26, 30, 33, 34, 35, 36, 38, 42, 55, 59
+        ]:
+            raise ValueError(f"cannot set parameter {i} of the iparm array")
+
+        self._handle.set_iparm(i, val)
+
+    @property
+    def nnz(self):
+        return self._handle.iparm[17]
+
+    def _analyze(self):
+        phase = 11
+        xb_dummy = np.empty([1,1], dtype=self._data_dtype)
+        error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy)
+        if error:
+            raise PardisoError("Analysis step error, "+_err_messages[error])
+
+    def _factor(self):
+        phase = 22
+        self._factored = False
+        xb_dummy = np.empty([1,1], dtype=self._data_dtype)
+        error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy)
+
+        if error:
+            raise PardisoError("Factor step error, "+_err_messages[error])
+
+        self._factored = True
\ No newline at end of file
diff --git a/pydiso/mkl_solver.pyx b/pydiso/mkl_solver.pyx
deleted file mode 100644
index 971ab52..0000000
--- a/pydiso/mkl_solver.pyx
+++ /dev/null
@@ -1,676 +0,0 @@
-#cython: language_level=3
-cimport numpy as np
-import cython
-from cpython.ref cimport(
-    Py_INCREF,
-    Py_DECREF,
-)
-from cpython.pythread cimport (
-    PyThread_type_lock,
-    PyThread_allocate_lock,
-    PyThread_acquire_lock,
-    PyThread_release_lock,
-    PyThread_free_lock
-)
-
-import warnings
-import numpy as np
-import scipy.sparse as sp
-import os
-import sys
-
-# We use np.PyArray_DATA to grab the pointer
-# to a numpy array.
-np.import_array()
-
-cdef extern from 'mkl.h':
-    ctypedef long long MKL_INT64
-    ctypedef int MKL_INT
-
-ctypedef MKL_INT int_t
-ctypedef MKL_INT64 long_t
-
-cdef extern from 'mkl.h':
-    int MKL_DOMAIN_PARDISO
-
-    ctypedef struct MKLVersion:
-        int MajorVersion
-        int MinorVersion
-        int UpdateVersion
-        char * ProductStatus
-        char * Build
-        char * Processor
-        char * Platform
-
-    void mkl_get_version(MKLVersion* pv)
-
-    void mkl_set_num_threads(int nth)
-    int mkl_domain_set_num_threads(int nt, int domain)
-    int mkl_get_max_threads()
-    int mkl_domain_get_max_threads(int domain)
-
-    ctypedef int (*ProgressEntry)(int* thread, int* step, char* stage, int stage_len) except? -1;
-    ProgressEntry mkl_set_progress(ProgressEntry progress);
-
-    ctypedef void * _MKL_DSS_HANDLE_t
-
-    void pardiso(_MKL_DSS_HANDLE_t, const int_t*, const int_t*, const int_t*,
-                 const int_t *, const int_t *, const void *, const int_t *,
-                 const int_t *, int_t *, const int_t *, int_t *,
-                 const int_t *, void *, void *, int_t *) nogil
-
-    void pardiso_64(_MKL_DSS_HANDLE_t, const long_t *, const long_t *, const long_t *,
-                    const long_t *, const long_t *, const void *, const long_t *,
-                    const long_t *, long_t *, const long_t *, long_t *,
-                    const long_t *, void *, void *, long_t *) nogil
-
-
-#call pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, perm, nrhs, iparm, msglvl, b, x, error)
-cdef int mkl_progress(int *thread, int* step, char* stage, int stage_len) nogil:
-    # must be a nogil process to pass to mkl pardiso progress reporting
-    with gil:
-        # must reacquire the gil to print out back to python.
-        print(thread[0], step[0], stage, stage_len)
-    return 0
-
-cdef int mkl_no_progress(int *thread, int* step, char* stage, int stage_len) nogil:
-    return 0
-
-MATRIX_TYPES ={
-    'real_structurally_symmetric': 1,
-    'real_symmetric_positive_definite': 2,
-    'real_symmetric_indefinite': -2,
-    'complex_structurally_symmetric': 3,
-    'complex_hermitian_positive_definite': 4,
-    'complex_hermitian_indefinite': -4,
-    'complex_symmetric': 6,
-    'real_nonsymmetric': 11,
-    'complex_nonsymmetric': 13}
-"""dict : matrix type string keys and corresponding integer value to describe
-the different supported matrix types.
-"""
-
-_err_messages = {0:"no error",
-                -1:'input inconsistent',
-                -2:'not enough memory',
-                -3:'reordering problem',
-                -4:'zero pivot, numerical factorization or iterative refinement problem',
-                -5:'unclassified (internal) error',
-                -6:'reordering failed',
-                -7:'diagonal matrix is singular',
-                -8:'32-bit integer overflow problem',
-                -9:'not enough memory for OOC',
-                -10:'error opening OOC files',
-                -11:'read/write error with OOC files',
-                -12:'pardiso_64 called from 32-bit library',
-                }
-
-class PardisoError(Exception):
-    pass
-
-class PardisoWarning(UserWarning):
-    pass
-
-class PardisoTypeConversionWarning(
-        PardisoWarning, sp.SparseEfficiencyWarning):
-    pass
-
-def _ensure_csr(A, sym=False):
-    if not sp.issparse(A):
-        raise(PardisoError("Matrix is not sparse."))
-    if not (sp.isspmatrix_csr(A)):
-        if sym and sp.isspmatrix_csc(A):
-            A = A.T
-        else:
-            warnings.warn("Converting %s matrix to CSR format."
-                         %A.__class__.__name__, PardisoTypeConversionWarning)
-            A = A.tocsr()
-    return A
-
-def get_mkl_max_threads():
-    """
-    Returns the current number of openMP threads available to the MKL Library
-    """
-    return mkl_get_max_threads()
-
-def get_mkl_pardiso_max_threads():
-    """
-    Returns the current number of openMP threads available to the Pardiso functions
-    """
-    return mkl_domain_get_max_threads(MKL_DOMAIN_PARDISO)
-
-def set_mkl_threads(num_threads=None):
-    """
-    Sets the number of openMP threads available to the MKL library.
-
-    Parameters
-    ----------
-    num_threads : None or int
-        number of threads to use for the MKL library.
-        None will set the number of threads to that returned by `os.cpu_count()`.
-    """
-    if num_threads is None:
-        num_threads = os.cpu_count()
-    elif num_threads<=0:
-        raise PardisoError('Number of threads must be greater than 0')
-    mkl_set_num_threads(num_threads)
-
-def set_mkl_pardiso_threads(num_threads=None):
-    """
-    Sets the number of openMP threads available to the Pardiso functions
-
-    Parameters
-    ----------
-    num_threads : None or int
-        Number of threads to use for the MKL Pardiso routines.
-        None (or 0) will set the number of threads to `get_mkl_max_threads`
-    """
-    if num_threads is None:
-        num_threads = 0
-    elif num_threads<0:
-        raise PardisoError('Number of threads must be greater than 0')
-    mkl_domain_set_num_threads(num_threads, MKL_DOMAIN_PARDISO)
-
-def get_mkl_version():
-    """
-    Returns a dictionary describing the version of Intel Math Kernel Library used
-    """
-    cdef MKLVersion vers
-    mkl_get_version(&vers)
-    return vers
-
-cdef class _PardisoParams:
-    cdef int_t iparm[64]
-    cdef int_t n, mtype, maxfct, mnum, msglvl
-    cdef int_t[:] ia, ja, perm
-
-cdef class _PardisoParams64:
-    cdef long_t iparm[64]
-    cdef long_t n, mtype, maxfct, mnum, msglvl
-    cdef long_t[:] ia, ja, perm
-
-ctypedef fused _par_params:
-    _PardisoParams
-    _PardisoParams64
-
-cdef class MKLPardisoSolver:
-    cdef _MKL_DSS_HANDLE_t handle[64]
-    cdef _PardisoParams _par
-    cdef _PardisoParams64 _par64
-    cdef int_t _is_32
-    cdef int_t mat_type
-    cdef int_t _factored
-    cdef size_t shape[2]
-    cdef PyThread_type_lock lock
-    cdef void * a
-
-    cdef object _data_type
-    cdef object _Adata # a reference to make sure the pointer "a" doesn't get destroyed
-
-    def __cinit__(self, *args, **kwargs):
-        self.lock = PyThread_allocate_lock()
-
-        for i in range(64):
-            self.handle[i] = NULL
-
-    def __init__(self, A, matrix_type=None, factor=True, verbose=False):
-        '''ParidsoSolver(A, matrix_type=None, factor=True, verbose=False)
-        An interface to the intel MKL pardiso sparse matrix solver.
-
-        This is a solver class for a scipy sparse matrix using the Pardiso sparse
-        solver in the Intel Math Kernel Library.
-
-        It will factorize the sparse matrix in three steps: a symbolic
-        factorization stage, a numerical factorization stage, and a solve stage.
-
-        The purpose is to construct a sparse factorization that can be repeatedly
-        called to solve for multiple right-hand sides.
-
-        Parameters
-        ----------
-        A : scipy.sparse.spmatrix
-            A sparse matrix preferably in a CSR format.
-        matrix_type : str, int, or None, optional
-            A string describing the matrix type, or it's corresponding int code.
-            If None, then assumed to be nonsymmetric matrix.
-        factor : bool, optional
-            Whether to perform the factorization stage upon instantiation of the class.
-        verbose : bool, optional
-            Enable verbose output from the pardiso solver.
-
-        Notes
-        -----
-
-        The supported matrix types are: real symmetric positive definite, real
-        symmetric indefinite, real structurally symmetric, real nonsymmetric,
-        complex hermitian positive definite, complex hermitian indefinite, complex
-        symmetric, complex structurally symmetric, and complex nonsymmetric.
-        The solver supports both single and double precision matrices.
-
-        Examples
-        --------
-
-        Solve a symmetric positive definite system by first forming a simple 5 point
-        laplacian stencil with a zero boundary condition. Then we create a known
-        solution vector to compare to result with.
-
-        >>> import scipy.sparse as sp
-        >>> from pydiso.mkl_solver import MKLPardisoSolver
-        >>> nx, ny = 5, 7
-        >>> Dx = sp.diags((-1, 1), (-1, 0), (nx+1, nx))
-        >>> Dy = sp.diags((-1, 1), (-1, 0), (ny+1, ny))
-        >>> A = sp.kron(sp.eye(nx), Dy.T @ Dy) + sp.kron(Dx.T @ Dx, sp.eye(ny))
-        >>> x = np.linspace(-10, 10, nx*ny)
-        >>> b = A @ x
-
-        Next we create the solver object using pardiso
-
-        >>> Ainv = MKLPardisoSolver(A, matrix_type='real_symmetric_positive_definite')
-        >>> x_solved = Ainv.solve(b)
-        >>> np.allclose(x, x_solved)
-        True
-        '''
-        n_row, n_col = A.shape
-        if n_row != n_col:
-            raise ValueError("Matrix is not square")
-        self.shape = n_row, n_col
-
-        self._data_type = A.dtype
-        if matrix_type is None:
-            if np.issubdtype(self._data_type, np.complexfloating):
-                matrix_type = MATRIX_TYPES['complex_nonsymmetric']
-            elif np.issubdtype(self._data_type, np.floating):
-                matrix_type = MATRIX_TYPES['real_nonsymmetric']
-            else:
-                raise(PardisoError('Unrecognized matrix data type'))
-        if not(matrix_type in MATRIX_TYPES or matrix_type in MATRIX_TYPES.values()):
-            raise PardisoError('Unrecognized matrix type')
-        if matrix_type in MATRIX_TYPES:
-            matrix_type = MATRIX_TYPES[matrix_type]
-        self.mat_type = matrix_type
-
-        if self.mat_type in [1, 2, -2, 11]:
-            if not np.issubdtype(self._data_type, np.floating):
-                raise TypeError(
-                    "matrix dtype and matrix_type not consistent, expected a real matrix"
-                )
-        else:
-            if not np.issubdtype(self._data_type, np.complexfloating):
-                raise TypeError(
-                    "matrix dtype and matrix_type not consistent, expected a complex matrix"
-                )
-
-        if self.mat_type in [-2, 2, -4, 4, 6]:
-            A = sp.triu(A, format='csr')
-        A = _ensure_csr(A)
-        A.sort_indices()
-
-        #set integer length
-        integer_len = A.indices.itemsize
-        #self._is_32 = integer_len == sizeof(int_t)
-        self._is_32 = sizeof(int_t) == 8 or integer_len == sizeof(int_t)
-
-        print("Am I calling _PardisoParams?")
-        print(self._is_32)
-
-        print("What is the matrix's indices itemsize?")
-        print(integer_len)
-
-        print("What is the matrix's indptr itemsize?")
-        print(A.indptr.itemsize)
-
-        print("What is the size of MKL_INT")
-        print(sizeof(int_t))
-
-        if self._is_32:
-            self._par = _PardisoParams()
-            self._initialize(self._par, A, matrix_type, verbose)
-        else:
-            self._par64 = _PardisoParams64()
-            self._initialize(self._par64, A, matrix_type, verbose)
-
-        if verbose:
-            #for reporting factorization progress via python's `print`
-            mkl_set_progress(mkl_progress)
-        else:
-            mkl_set_progress(mkl_no_progress)
-
-        self._set_A(A.data)
-        self._analyze()
-        self._factored = False
-        if factor:
-            self._factor()
-
-    def refactor(self, A):
-        """solver.refactor(A)
-        re-use a symbolic factorization with a new `A` matrix.
-
-        Note
-        ----
-        Must have the same non-zero pattern as the initial `A` matrix.
-        If `full_refactor=False`, the initial factorization is used as a
-        preconditioner to a Krylov subspace solver in the solve step.
-
-        Parameters
-        ----------
-        A : scipy.sparse.spmatrix
-            A sparse matrix preferably in a CSR format.
-        """
-        #Assumes that the matrix A has the same non-zero pattern and ordering
-        #as the initial A matrix
-        if self.mat_type in [-2, 2, -4, 4, 6]:
-            A = sp.triu(A, format='csr')
-        A = _ensure_csr(A)
-        A.sort_indices()
-
-        self._set_A(A.data)
-        self._factor()
-
-    cdef _initialized(self):
-        # If any of the handle pointers are not null, return 1
-        cdef int i
-        for i in range(64):
-            if self.handle[i]:
-                return 1
-        return 0
-
-    def __call__(self, b):
-        return self.solve(b)
-
-    def solve(self, b, x=None, transpose=False):
-        """solve(self, b, x=None, transpose=False)
-        Solves the equation AX=B using the factored A matrix
-
-        Note
-        ----
-        The data will be copied if not contiguous in all cases. If multiple rhs
-        are given, the input arrays will be copied if not in a contiguous
-        Fortran order.
-
-        Parameters
-        ----------
-        b : numpy.ndarray
-            array of shape 1D or 2D for the right hand side of the equation
-            (of the same data type as A).
-        x : numpy.ndarray, optional
-            A pre-allocated output array (of the same data type as A).
-            If None, a new array is constructed.
-        transpose : bool, optional
-            If True, it will solve A^TX=B using the factored A matrix.
-
-        Returns
-        -------
-        numpy array
-            array containing the solution (in Fortran ordering)
-        """
-        if b.dtype != self._data_type:
-            warnings.warn("rhs does not have the same data type as A",
-                            PardisoTypeConversionWarning)
-            b = b.astype(self._data_type)
-        b = np.atleast_1d(b)
-        if b.shape[0] != self.shape[0]:
-            raise ValueError(f"incorrect length of b, expected {self.shape[0]}, got {b.shape[0]}")
-        b = np.require(b, requirements='F')
-
-        if x is None:
-            x = np.empty_like(b)
-        if(x.dtype!=self._data_type):
-            warnings.warn("output does not have the same data type as A",
-                            PardisoTypeConversionWarning)
-            x = x.astype(self._data_type)
-        x = np.atleast_1d(x)
-        if x.shape[0] != self.shape[0]:
-            raise ValueError(f"incorrect length of x, expected {self.shape[0]}, got {x.shape[0]}")
-        x = np.require(x, requirements='F')
-
-        print("Validated input arrays")
-
-        Py_INCREF(b)
-        cdef void * bp = np.PyArray_DATA(b)
-        Py_INCREF(x)
-        cdef void * xp = np.PyArray_DATA(x)
-
-        print("assigned x and b pointers")
-
-        if bp == xp:
-            Py_DECREF(b)
-            Py_DECREF(x)
-            raise PardisoError("b and x must be different arrays")
-
-        cdef int_t nrhs = b.shape[1] if b.ndim == 2 else 1
-
-        if transpose:
-            self.set_iparm(11, 2)
-        else:
-            self.set_iparm(11, 0)
-
-        print("updated iparm 11")
-        try:
-            self._solve(bp, xp, nrhs)
-        except Exception as err:
-            raise err
-        finally:
-            Py_DECREF(b)
-            Py_DECREF(x)
-        return x
-
-    @property
-    def perm(self):
-        """ Fill-reducing permutation vector used inside pardiso.
-        """
-        if self._is_32:
-            return np.array(self._par.perm)
-        else:
-            return np.array(self._par64.perm)
-
-    @property
-    def iparm(self):
-        """ Parameter options for the pardiso solver.
-        """
-        if self._is_32:
-            return np.array(self._par.iparm)
-        else:
-            return np.array(self._par64.iparm)
-
-    def set_iparm(self, int_t i, int_t val):
-        if i > 63 or i < 0:
-            raise IndexError(f"index {i} is out of bounds for size 64 array")
-        if i not in [
-            1, 3, 4, 5, 7, 9, 10, 11, 12, 17, 18, 20, 23,
-            24, 26, 30, 33, 34, 35, 36, 38, 42, 55, 59
-        ]:
-            raise PardisoError(f"cannot set parameter {i} of the iparm array")
-        if self._is_32:
-            self._par.iparm[i] = val
-        else:
-            self._par64.iparm[i] = val
-
-    @property
-    def nnz(self):
-        return self.iparm[17]
-
-    cdef _initialize(self, _par_params par, A, matrix_type, verbose):
-
-        if _par_params is _PardisoParams:
-            int_dtype = f'i{sizeof(int_t)}'
-        else:
-            int_dtype = 'i8'
-        par.n = A.shape[0]
-        print("Initializing permutation array with ", int_dtype, "dtype")
-        par.perm = np.empty(par.n, dtype=int_dtype)
-
-        par.maxfct = 1
-        par.mnum = 1
-
-        par.mtype = matrix_type
-        par.msglvl = verbose
-
-        for i in range(64):
-            par.iparm[i] = 0  # ensure these all start at 0
-
-        # set default parameters
-        par.iparm[0] = 1  # tell pardiso to not reset these values on the first call
-        par.iparm[1] = 2  # The nested dissection algorithm from the METIS
-        par.iparm[3] = 0  # The factorization is always computed as required by phase.
-        par.iparm[4] = 2  # fill perm with computed permutation vector
-        par.iparm[5] = 0  # The array x contains the solution; right-hand side vector b is kept unchanged.
-        par.iparm[7] = 0  # The solver automatically performs two steps of iterative refinement when perterbed pivots are obtained
-        par.iparm[9] = 13 if matrix_type in [11, 13] else 8
-        par.iparm[10] = 1 if matrix_type in [11, 13] else 0
-        par.iparm[11] = 0  # Solve a linear system AX = B (as opposed to A.T or A.H)
-        par.iparm[12] = 1 if matrix_type in [11, 13] else 0
-        par.iparm[17] = -1  # Return the number of non-zeros in this value after first call
-        par.iparm[18] = 0  # do not report flop count
-        par.iparm[20] = 1 if matrix_type in [-2, -4, 6] else 0
-        par.iparm[23] = 0  # classic (not parallel) factorization
-        par.iparm[24] = 0  # default behavoir of parallel solving
-        par.iparm[26] = 0  # Do not check the input matrix
-        #set precision
-        if self._data_type==np.float64 or self._data_type==np.complex128:
-            par.iparm[27] = 0
-        elif self._data_type==np.float32 or self._data_type==np.complex64:
-            par.iparm[27] = 1
-        else:
-            raise TypeError("Unsupported data type")
-        par.iparm[30] = 0  # this would be used to enable sparse input/output for solves
-        par.iparm[33] = 0  # optimal number of thread for CNR mode
-        par.iparm[34] = 1  # zero based indexing
-        par.iparm[35] = 0  # Do not compute schur complement
-        par.iparm[36] = 0  # use CSR storage format
-        par.iparm[38] = 0  # Do not use low rank update
-        par.iparm[42] = 0  # Do not compute the diagonal of the inverse
-        par.iparm[55] = 0  # Internal function used to work with pivot and calculation of diagonal arrays turned off.
-        par.iparm[59] = 0  # operate in-core mode
-
-        par.ia = np.require(A.indptr, dtype=int_dtype, requirements='C')
-        par.ja = np.require(A.indices, dtype=int_dtype, requirements='C')
-
-    cdef _set_A(self, data):
-        self._Adata = np.require(data, requirements='C')
-        self.a = np.PyArray_DATA(data)
-
-    def __dealloc__(self):
-        # Need to call pardiso with phase=-1 to release memory
-        cdef int_t phase=-1, nrhs=0, error=0
-        cdef long_t phase64=-1, nrhs64=0, error64=0
-
-        print("Deallocating myself")
-
-        if self._initialized():
-            print("I was initialized, so call pardiso to release the memory.")
-            with nogil:
-                PyThread_acquire_lock(self.lock, 1)
-                if self._is_32:
-                    pardiso(
-                        self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype,
-                        &phase, &self._par.n, NULL, NULL, NULL, NULL, &nrhs, self._par.iparm,
-                        &self._par.msglvl, NULL, NULL, &error
-                    )
-                else:
-                    pardiso_64(
-                        self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype,
-                        &phase64, &self._par64.n, NULL, NULL, NULL, NULL, &nrhs64,
-                        self._par64.iparm, &self._par64.msglvl, NULL, NULL, &error64
-                    )
-                PyThread_release_lock(self.lock)
-            err = error or error64
-            if err!=0:
-                raise PardisoError("Memory release error "+_err_messages[err])
-            for i in range(64):
-                self.handle[i] = NULL
-
-        if self.lock:
-            print("I had a lock")
-            #dealloc lock
-            PyThread_free_lock(self.lock)
-            self.lock = NULL
-            print("lock is no more lock")
-
-        print("Deallocated myself")
-
-    cdef _analyze(self):
-        #phase = 11
-        with nogil:
-            err = self._run_pardiso(11)
-        if err!=0:
-            raise PardisoError("Analysis step error, "+_err_messages[err])
-
-    cdef _factor(self):
-        #phase = 22
-        self._factored = False
-
-        with nogil:
-            err = self._run_pardiso(22)
-
-        if err!=0:
-            raise PardisoError("Factor step error, "+_err_messages[err])
-
-        self._factored = True
-
-    cdef _solve(self, void* b, void* x, int_t nrhs_in):
-        #phase = 33
-        if(not self._factored):
-            raise PardisoError("Cannot solve without a previous factorization.")
-
-        print("trying to solve")
-
-        with nogil:
-            err = self._run_pardiso(33, b, x, nrhs_in)
-
-        if err!=0:
-            raise PardisoError("Solve step error, "+_err_messages[err])
-
-    @cython.boundscheck(False)
-    cdef int _run_pardiso(self, int_t phase, void* b=NULL, void* x=NULL, int_t nrhs=0) noexcept nogil:
-        cdef int_t error=0
-        cdef long_t error64=0, phase64=phase, nrhs64=nrhs
-
-        PyThread_acquire_lock(self.lock, 1)
-        if self._is_32:
-            with gil:
-                print("Calling pardiso")
-
-                print("maxfct", self._par.maxfct)
-                print("mnum", self._par.mnum)
-                print("mtype", self._par.mtype)
-                print("phase", phase)
-                print("n", self._par.n)
-                print("nrhs", nrhs)
-                print("msglvl", self._par.msglvl)
-                print("ia")
-                n = self._par.n
-                for i in range(n+1):
-                    print(self._par.ia[i])
-                nnz = self._par.ia[n]
-                print("ja")
-                for i in range(nnz):
-                    print(self._par.ja[i])
-
-                print("a")
-                for i in range(nnz):
-                    print(self._Adata[i])
-                if x:
-                    print("x")
-                    for i in range(n):
-                        x[i]
-                if b:
-                    print("b")
-                    for i in range(n):
-                        b[i]
-
-                sys.stdout.flush()
-
-            pardiso(self.handle, &self._par.maxfct, &self._par.mnum, &self._par.mtype,
-                    &phase, &self._par.n, self.a, &self._par.ia[0], &self._par.ja[0],
-                    &self._par.perm[0], &nrhs, self._par.iparm, &self._par.msglvl, b, x, &error)
-
-            with gil:
-                print("Called pardiso, error was ", error)
-                sys.stdout.flush()
-        else:
-            pardiso_64(self.handle, &self._par64.maxfct, &self._par64.mnum, &self._par64.mtype,
-                    &phase64, &self._par64.n, self.a, &self._par64.ia[0], &self._par64.ja[0],
-                    &self._par64.perm[0], &nrhs64, self._par64.iparm, &self._par64.msglvl, b, x, &error64)
-        PyThread_release_lock(self.lock)
-        error = error or error64
-        return error
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index e0d3b8b..f06abe0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ name = 'pydiso'
 dynamic = ["version"]
 description = "Wrapper for intel's pardiso implementation in the MKL"
 readme = 'README.md'
-requires-python = '>=3.8'
+requires-python = '>=3.10'
 authors = [
   {name = 'SimPEG developers', email = 'josephrcapriotti@gmail.com'},
 ]
diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py
index 5f65254..ebf3961 100644
--- a/tests/test_pydiso.py
+++ b/tests/test_pydiso.py
@@ -25,8 +25,8 @@
 Uc = sp.diags([(2+2j), -(1+1j)], [0, 1], (n, n))
 U2c = sp.diags([(2+2j), -(1+1j)], [0, 2], (n, n))
 
-xr = np.random.rand(n)
-xc = np.random.rand(n) + np.random.rand(n)*1j
+xr = np.linspace(-10, 10, n)
+xc = np.linspace(-20, 20, n) + np.linspace(20, -20, n)*1j
 
 A_real_dict = {'real_structurally_symmetric': L@U,
                'real_symmetric_positive_definite': L@L.T,
@@ -35,8 +35,8 @@
                }
 A_complex_dict = {'complex_structurally_symmetric': Lc@Uc,
                   'complex_hermitian_positive_definite': Lc@Lc.T.conjugate(),
-                  'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(),
-                  'complex_symmetric': Lc@Lc.T,
+                  #'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(),
+                  #'complex_symmetric': Lc@Lc.T,
                   'complex_nonsymmetric': Lc@U2c
                   }
 
@@ -86,7 +86,6 @@ def test_version():
 @pytest.mark.parametrize("A, matrix_type", inputs)
 def test_solver(A, matrix_type):
     dtype = A.dtype
-    print(dtype)
     if np.issubdtype(dtype, np.complexfloating):
         x = xc.astype(dtype)
     else:
@@ -94,11 +93,11 @@ def test_solver(A, matrix_type):
     b = A@x
 
     solver = Solver(A, matrix_type=matrix_type)
-    sys.stdout.flush()
+    A_valid = sp.csr_matrix((solver._data, solver._indices, solver._indptr))
     x2 = solver.solve(b)
 
     eps = np.finfo(dtype).eps
-    np.testing.assert_allclose(x, x2, atol=1E3*eps)
+    np.testing.assert_allclose(x, x2, atol=2E3*eps)
 
 @pytest.mark.parametrize("A, matrix_type", inputs)
 def test_transpose_solver(A, matrix_type):
@@ -113,7 +112,7 @@ def test_transpose_solver(A, matrix_type):
     x2 = solver.solve(b, transpose=True)
 
     eps = np.finfo(dtype).eps
-    np.testing.assert_allclose(x, x2, atol=1E3*eps)
+    np.testing.assert_allclose(x, x2, atol=2E3*eps)
 
 def test_multiple_RHS():
     A = A_real_dict["real_symmetric_positive_definite"]
@@ -124,16 +123,16 @@ def test_multiple_RHS():
     x2 = solver.solve(b)
 
     eps = np.finfo(np.float64).eps
-    np.testing.assert_allclose(x, x2, atol=1E3*eps)
+    np.testing.assert_allclose(x, x2, atol=2E3*eps)
 
 
 def test_matrix_type_errors():
     A = A_real_dict["real_symmetric_positive_definite"]
-    with pytest.raises(TypeError):
+    with pytest.raises(ValueError):
         solver = Solver(A, matrix_type="complex_hermitian_positive_definite")
 
     A = A_complex_dict["complex_structurally_symmetric"]
-    with pytest.raises(TypeError):
+    with pytest.raises(ValueError):
         solver = Solver(A, matrix_type="real_symmetric_positive_definite")
 
 

From 8dc1d4eca4e0c263c3f1d96ed147a4f32e986c8e Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Tue, 24 Sep 2024 09:30:47 -0600
Subject: [PATCH 23/25] remove a few call signatures from doc strings

---
 pydiso/mkl_solver.py | 57 +++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 35 deletions(-)

diff --git a/pydiso/mkl_solver.py b/pydiso/mkl_solver.py
index a4cf6a7..38113be 100644
--- a/pydiso/mkl_solver.py
+++ b/pydiso/mkl_solver.py
@@ -36,23 +36,10 @@ class PardisoTypeConversionWarning(
         PardisoWarning, sp.SparseEfficiencyWarning):
     pass
 
-
-def _ensure_csr(A, sym=False):
-    if not (sp.isspmatrix_csr(A)):
-        if sym and sp.isspmatrix_csc(A):
-            A = A.T
-        else:
-            warnings.warn("Converting %s matrix to CSR format."
-                         %A.__class__.__name__, PardisoTypeConversionWarning)
-            A = A.tocsr()
-    return A
-
-
 class MKLPardisoSolver:
 
     def __init__(self, A, matrix_type=None, factor=True, verbose=False):
-        '''ParidsoSolver(A, matrix_type=None, factor=True, verbose=False)
-        An interface to the intel MKL pardiso sparse matrix solver.
+        '''An interface to the Intel MKL pardiso sparse matrix solver.
 
         This is a solver class for a scipy sparse matrix using the Pardiso sparse
         solver in the Intel Math Kernel Library.
@@ -68,7 +55,7 @@ def __init__(self, A, matrix_type=None, factor=True, verbose=False):
         A : scipy.sparse.spmatrix
             A sparse matrix preferably in a CSR format.
         matrix_type : str, int, or None, optional
-            A string describing the matrix type, or it's corresponding int code.
+            A string describing the matrix type, or its corresponding integer code.
             If None, then assumed to be nonsymmetric matrix.
         factor : bool, optional
             Whether to perform the factorization stage upon instantiation of the class.
@@ -157,7 +144,7 @@ def __init__(self, A, matrix_type=None, factor=True, verbose=False):
         self.matrix_type = matrix_type
 
         indptr = np.asarray(A.indptr)  # double check it's a numpy array
-        mkl_int_size= get_mkl_int_size()
+        mkl_int_size = get_mkl_int_size()
         mkl_int64_size = get_mkl_int64_size()
 
         target_int_size = mkl_int_size if indptr.itemsize <= mkl_int_size else mkl_int64_size
@@ -180,14 +167,11 @@ def __init__(self, A, matrix_type=None, factor=True, verbose=False):
             self._factor()
 
     def refactor(self, A):
-        """solver.refactor(A)
-        re-use a symbolic factorization with a new `A` matrix.
+        """Reuse a symbolic factorization with a new matrix.
 
         Note
         ----
         Must have the same non-zero pattern as the initial `A` matrix.
-        If `full_refactor=False`, the initial factorization is used as a
-        preconditioner to a Krylov subspace solver in the solve step.
 
         Parameters
         ----------
@@ -212,14 +196,7 @@ def __call__(self, b):
         return self.solve(b)
 
     def solve(self, b, x=None, transpose=False):
-        """solve(self, b, x=None, transpose=False)
-        Solves the equation AX=B using the factored A matrix
-
-        Note
-        ----
-        The data will be copied if not contiguous in all cases. If multiple rhs
-        are given, the input arrays will be copied if not in a contiguous
-        Fortran order.
+        """Solves the equation AX=B using the factored A matrix
 
         Parameters
         ----------
@@ -234,12 +211,15 @@ def solve(self, b, x=None, transpose=False):
 
         Returns
         -------
-        numpy array
+        numpy.ndarray
             array containing the solution (in Fortran ordering)
-        """
-        if(not self._factored):
-            raise PardisoError("Cannot solve without a previous factorization.")
 
+        Notes
+        -----
+        The data will be copied if not contiguous in all cases. If multiple rhs
+        are given, the input arrays will be copied if not in a contiguous
+        Fortran order.
+        """
         if b.dtype != self._data_dtype:
             warnings.warn("rhs does not have the same data type as A",
                             PardisoTypeConversionWarning)
@@ -279,6 +259,9 @@ def solve(self, b, x=None, transpose=False):
         if x is b or (x.base is not None and (x.base is b.base)):
             raise ValueError("x and b cannot point to the same memory")
 
+        if not self._factored:
+            self._factor()
+
         self._handle.set_iparm(11, 2 if transpose else 0)
 
         phase = 33
@@ -308,7 +291,11 @@ def _validate_matrix(self, mat):
             if sp.isspmatrix_csc(mat):
                 mat = mat.T  # Transpose to get a CSR matrix since it's symmetric
             mat = sp.triu(mat, format='csr')
-        mat = _ensure_csr(mat)
+
+        if not (sp.isspmatrix_csr(mat)):
+            warnings.warn("Converting %s matrix to CSR format."
+                          % mat.__class__.__name__, PardisoTypeConversionWarning)
+            mat = mat.tocsr()
         mat.sort_indices()
         mat.sum_duplicates()
 
@@ -335,7 +322,7 @@ def nnz(self):
 
     def _analyze(self):
         phase = 11
-        xb_dummy = np.empty([1,1], dtype=self._data_dtype)
+        xb_dummy = np.empty([1, 1], dtype=self._data_dtype)
         error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy)
         if error:
             raise PardisoError("Analysis step error, "+_err_messages[error])
@@ -343,7 +330,7 @@ def _analyze(self):
     def _factor(self):
         phase = 22
         self._factored = False
-        xb_dummy = np.empty([1,1], dtype=self._data_dtype)
+        xb_dummy = np.empty([1, 1], dtype=self._data_dtype)
         error = self._handle.call_pardiso(phase, self._data, self._indptr, self._indices, xb_dummy, xb_dummy)
 
         if error:

From a4eb41a483d248e078c8a13a7470e68245a9fc30 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Tue, 24 Sep 2024 09:36:46 -0600
Subject: [PATCH 24/25] add complex tests back in

---
 tests/test_pydiso.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_pydiso.py b/tests/test_pydiso.py
index ebf3961..624d50f 100644
--- a/tests/test_pydiso.py
+++ b/tests/test_pydiso.py
@@ -35,8 +35,8 @@
                }
 A_complex_dict = {'complex_structurally_symmetric': Lc@Uc,
                   'complex_hermitian_positive_definite': Lc@Lc.T.conjugate(),
-                  #'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(),
-                  #'complex_symmetric': Lc@Lc.T,
+                  'complex_hermitian_indefinite': Lc@D@Lc.T.conjugate(),
+                  'complex_symmetric': Lc@Lc.T,
                   'complex_nonsymmetric': Lc@U2c
                   }
 

From e017d0e504a3bc46dbb32491b1025afcfb215395 Mon Sep 17 00:00:00 2001
From: Joseph Capriotti <josephrcapriotti@gmail.com>
Date: Tue, 24 Sep 2024 09:44:13 -0600
Subject: [PATCH 25/25] update distribute workflow

---
 .github/workflows/python-package-conda.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml
index bfe46df..406d643 100644
--- a/.github/workflows/python-package-conda.yml
+++ b/.github/workflows/python-package-conda.yml
@@ -70,7 +70,6 @@ jobs:
       run: |
         ${{ matrix.coverage && 'coverage run -m' || '' }} pytest -s -v
         ${{ matrix.coverage && 'coverage xml' || '' }}
-#      continue-on-error: ${{ matrix.os == 'windows-latest' && matrix.mkl-version == '2024'}}
 
     - name: Upload coverage
       if: ${{ matrix.coverage }}
@@ -93,9 +92,8 @@ jobs:
     - name: Setup Conda
       uses: conda-incubator/setup-miniconda@v3
       with:
-        python-version: 3.11
-        mamba-version: '*'
-        channels: conda-forge, defaults
+        python-version: "3.11"
+        channels: defaults
         channel-priority: true
         activate-environment: dev