1+ #cython: language_level=3
2+ cimport numpy as np
3+ import cython
4+ from cpython.pythread cimport (
5+ PyThread_type_lock,
6+ PyThread_allocate_lock,
7+ PyThread_acquire_lock,
8+ PyThread_release_lock,
9+ PyThread_free_lock
10+ )
11+
12+ import numpy as np
13+ import os
14+
15+ # We use np.PyArray_DATA to grab the pointer
16+ # to a numpy array.
17+ np.import_array()
18+
19+ cdef extern from 'mkl.h':
20+ ctypedef long long MKL_INT64
21+ ctypedef int MKL_INT
22+
23+ ctypedef MKL_INT int_t
24+ ctypedef MKL_INT64 long_t
25+
26+ cdef extern from 'mkl.h':
27+ int MKL_DOMAIN_PARDISO
28+
29+ ctypedef struct MKLVersion:
30+ int MajorVersion
31+ int MinorVersion
32+ int UpdateVersion
33+ char * ProductStatus
34+ char * Build
35+ char * Processor
36+ char * Platform
37+
38+ void mkl_get_version(MKLVersion* pv)
39+
40+ void mkl_set_num_threads(int nth)
41+ int mkl_domain_set_num_threads(int nt, int domain)
42+ int mkl_get_max_threads()
43+ int mkl_domain_get_max_threads(int domain)
44+
45+ ctypedef int (*ProgressEntry)(int* thread, int* step, char* stage, int stage_len) except? -1;
46+ ProgressEntry mkl_set_progress(ProgressEntry progress);
47+
48+ ctypedef void * _MKL_DSS_HANDLE_t
49+
50+ void pardiso(_MKL_DSS_HANDLE_t, const int_t*, const int_t*, const int_t*,
51+ const int_t *, const int_t *, const void *, const int_t *,
52+ const int_t *, int_t *, const int_t *, int_t *,
53+ const int_t *, void *, void *, int_t *) nogil
54+
55+ void pardiso_64(_MKL_DSS_HANDLE_t, const long_t *, const long_t *, const long_t *,
56+ const long_t *, const long_t *, const void *, const long_t *,
57+ const long_t *, long_t *, const long_t *, long_t *,
58+ const long_t *, void *, void *, long_t *) nogil
59+
60+
61+ _err_messages = {0:"no error",
62+ -1:'input inconsistent',
63+ -2:'not enough memory',
64+ -3:'reordering problem',
65+ -4:'zero pivot, numerical factorization or iterative refinement problem',
66+ -5:'unclassified (internal) error',
67+ -6:'reordering failed',
68+ -7:'diagonal matrix is singular',
69+ -8:'32-bit integer overflow problem',
70+ -9:'not enough memory for OOC',
71+ -10:'error opening OOC files',
72+ -11:'read/write error with OOC files',
73+ -12:'pardiso_64 called from 32-bit library',
74+ }
75+
76+ class PardisoError(Exception):
77+ pass
78+
79+ class PardisoWarning(UserWarning):
80+ pass
81+
82+
83+ #call pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, perm, nrhs, iparm, msglvl, b, x, error)
84+ cdef int mkl_progress(int *thread, int* step, char* stage, int stage_len) nogil:
85+ # must be a nogil process to pass to mkl pardiso progress reporting
86+ with gil:
87+ # must reacquire the gil to print out back to python.
88+ print(thread[0], step[0], stage, stage_len)
89+ return 0
90+
91+ cdef int mkl_no_progress(int *thread, int* step, char* stage, int stage_len) nogil:
92+ return 0
93+
94+
95+ def get_mkl_max_threads():
96+ """
97+ Returns the current number of openMP threads available to the MKL Library
98+ """
99+ return mkl_get_max_threads()
100+
101+ def get_mkl_pardiso_max_threads():
102+ """
103+ Returns the current number of openMP threads available to the Pardiso functions
104+ """
105+ return mkl_domain_get_max_threads(MKL_DOMAIN_PARDISO)
106+
107+ def set_mkl_threads(num_threads=None):
108+ """
109+ Sets the number of openMP threads available to the MKL library.
110+
111+ Parameters
112+ ----------
113+ num_threads : None or int
114+ number of threads to use for the MKL library.
115+ None will set the number of threads to that returned by `os.cpu_count()`.
116+ """
117+ if num_threads is None:
118+ num_threads = os.cpu_count()
119+ elif num_threads<=0:
120+ raise ValueError('Number of threads must be greater than 0')
121+ mkl_set_num_threads(num_threads)
122+
123+ def set_mkl_pardiso_threads(num_threads=None):
124+ """
125+ Sets the number of openMP threads available to the Pardiso functions
126+
127+ Parameters
128+ ----------
129+ num_threads : None or int
130+ Number of threads to use for the MKL Pardiso routines.
131+ None (or 0) will set the number of threads to `get_mkl_max_threads`
132+ """
133+ if num_threads is None:
134+ num_threads = 0
135+ elif num_threads<0:
136+ raise ValueError('Number of threads must be greater than 0')
137+ mkl_domain_set_num_threads(num_threads, MKL_DOMAIN_PARDISO)
138+
139+ def get_mkl_version():
140+ """
141+ Returns a dictionary describing the version of Intel Math Kernel Library used
142+ """
143+ cdef MKLVersion vers
144+ mkl_get_version(&vers)
145+ return vers
146+
147+ def get_mkl_int_size():
148+ """Return the size of the MKL_INT at compile time in bytes.
149+
150+ Returns
151+ -------
152+ int
153+ """
154+ return sizeof(MKL_INT)
155+
156+
157+ def get_mkl_int64_size():
158+ """Return the size of the MKL_INT64 at compile time in bytes.
159+
160+ Returns
161+ -------
162+ int
163+ """
164+ return sizeof(MKL_INT64)
165+
166+
167+
168+ ctypedef fused real_or_complex:
169+ np.float32_t
170+ np.float64_t
171+ np.complex64_t
172+ np.complex128_t
173+
174+
175+ {{for int_type in ["int_t", "long_t"]}}
176+ cdef class _PardisoHandle_{{int_type}}:
177+ cdef _MKL_DSS_HANDLE_t handle[64]
178+ cdef PyThread_type_lock lock
179+
180+ cdef {{int_type}} n, maxfct, mnum, msglvl
181+ cdef public {{int_type}} matrix_type
182+ cdef public {{int_type}}[64] iparm
183+ cdef public {{int_type}}[:] perm
184+
185+ @cython.boundscheck(False)
186+ def __cinit__(self, A_dat_dtype, n, matrix_type, maxfct, mnum, msglvl):
187+ self.lock = PyThread_allocate_lock()
188+
189+ np_int_dtype = np.dtype(f"i{sizeof({{int_type}})}")
190+
191+ for i in range(64):
192+ self.handle[i] = NULL
193+
194+ self.n = n
195+ self.matrix_type = matrix_type
196+ self.maxfct = maxfct
197+ self.mnum = mnum
198+ self.msglvl = msglvl
199+
200+ if self.msglvl:
201+ #for reporting factorization progress via python's `print`
202+ mkl_set_progress(mkl_progress)
203+ else:
204+ mkl_set_progress(mkl_no_progress)
205+
206+ is_single_precision = np.issubdtype(A_dat_dtype, np.single) or np.issubdtype(A_dat_dtype, np.csingle)
207+
208+ self.perm = np.empty(self.n, dtype=np_int_dtype)
209+
210+ for i in range(64):
211+ self.iparm[i] = 0 # ensure these all start at 0
212+
213+ # set default parameters
214+ self.iparm[0] = 1 # tell pardiso to not reset these values on the first call
215+ self.iparm[1] = 2 # The nested dissection algorithm from the METIS
216+ self.iparm[3] = 0 # The factorization is always computed as required by phase.
217+ self.iparm[4] = 2 # fill perm with computed permutation vector
218+ self.iparm[5] = 0 # The array x contains the solution; right-hand side vector b is kept unchanged.
219+ self.iparm[7] = 0 # The solver automatically performs two steps of iterative refinement when perterbed pivots are obtained
220+ self.iparm[9] = 13 if matrix_type in [11, 13] else 8
221+ self.iparm[10] = 1 if matrix_type in [11, 13] else 0
222+ self.iparm[11] = 0 # Solve a linear system AX = B (as opposed to A.T or A.H)
223+ self.iparm[12] = 1 if matrix_type in [11, 13] else 0
224+ self.iparm[17] = -1 # Return the number of non-zeros in this value after first call
225+ self.iparm[18] = 0 # do not report flop count
226+ self.iparm[20] = 1 if matrix_type in [-2, -4, 6] else 0
227+ self.iparm[23] = 0 # classic (not parallel) factorization
228+ self.iparm[24] = 0 # default behavoir of parallel solving
229+ self.iparm[26] = 1 # Do not check the input matrix
230+ self.iparm[27] = is_single_precision # 1 if single, 0 if double
231+ self.iparm[30] = 0 # this would be used to enable sparse input/output for solves
232+ self.iparm[33] = 0 # optimal number of thread for CNR mode
233+ self.iparm[34] = 1 # zero based indexing
234+ self.iparm[35] = 0 # Do not compute schur complement
235+ self.iparm[36] = 0 # use CSR storage format
236+ self.iparm[38] = 0 # Do not use low rank update
237+ self.iparm[42] = 0 # Do not compute the diagonal of the inverse
238+ self.iparm[55] = 0 # Internal function used to work with pivot and calculation of diagonal arrays turned off.
239+ self.iparm[59] = 0 # operate in-core mode
240+
241+ def initialized(self):
242+ return self._initialized()
243+
244+ cdef int _initialized(self) noexcept nogil:
245+ # If any of the handle pointers are not null, return 1
246+ cdef int i
247+ for i in range(64):
248+ if self.handle[i]:
249+ return 1
250+ return 0
251+
252+ def set_iparm(self, {{int_type}} i, {{int_type}} val):
253+ self.iparm[i] = val
254+
255+ @cython.boundscheck(False)
256+ cpdef {{int_type}} call_pardiso(self,
257+ {{int_type}} phase,
258+ real_or_complex[::1] a_data,
259+ {{int_type}}[::1] a_indptr,
260+ {{int_type}}[::1] a_indices,
261+ real_or_complex[::1, :] rhs,
262+ real_or_complex[::1, :] out
263+ ):
264+ cdef {{int_type}} error, nrhs
265+ with nogil:
266+ nrhs = rhs.shape[1]
267+ PyThread_acquire_lock(self.lock, mode=1)
268+ pardiso{{if int_type == "long_t"}}_64{{endif}}(
269+ self.handle, &self.maxfct, &self.mnum, &self.matrix_type, &phase, &self.n,
270+ &a_data[0], &a_indptr[0], &a_indices[0], &self.perm[0],
271+ &nrhs, self.iparm, &self.msglvl,
272+ &rhs[0, 0], &out[0, 0], &error
273+ )
274+ PyThread_release_lock(self.lock)
275+ return error
276+
277+ @cython.boundscheck(False)
278+ def __dealloc__(self):
279+ # Need to call pardiso with phase=-1 to release memory (if it was initialized)
280+ cdef {{int_type}} phase = -1, nrhs = 0, error = 0
281+
282+ with nogil:
283+ PyThread_acquire_lock(self.lock, mode=1)
284+ if self._initialized():
285+ pardiso{{if int_type == "long_t"}}_64{{endif}}(
286+ self.handle, &self.maxfct, &self.mnum, &self.matrix_type,
287+ &phase, &self.n, NULL, NULL, NULL, NULL, &nrhs, self.iparm,
288+ &self.msglvl, NULL, NULL, &error)
289+ if error == 0:
290+ for i in range(64):
291+ self.handle[i] = NULL
292+ PyThread_release_lock(self.lock)
293+ if error != 0:
294+ raise MemoryError("Pardiso Memory release error: " + _err_messages[error])
295+ if self.lock:
296+ #deallocate the lock
297+ PyThread_free_lock(self.lock)
298+ self.lock = NULL
299+ {{endfor}}
0 commit comments