Skip to content

Commit 1c1b8bd

Browse files
committed
Merge branch 're-configure_interface' into ci_maintenance
2 parents 00871be + cd7ce76 commit 1c1b8bd

7 files changed

Lines changed: 685 additions & 718 deletions

File tree

.github/workflows/python-package-conda.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,16 @@ jobs:
5454
conda install --quiet --yes -c conda-forge \
5555
pip numpy scipy cython mkl=${{ matrix.mkl-version }} pytest \
5656
mkl-devel pkg-config meson-python meson ninja setuptools_scm \
57-
${{ matrix.coverage && 'coverage' || ''}}
57+
${{ matrix.coverage && 'coverage' || ''}} \
58+
${{ matrix.os == 'windows-latest' && '"libblas=*=*mkl"' || ''}}
5859
5960
- name: Install Our Package
6061
run: |
6162
python -m pip install --no-build-isolation --verbose --editable . \
6263
--config-setting=compile-args=-v \
6364
${{ matrix.coverage && '--config-settings=setup-args="-Db_coverage=true"' || ''}} \
64-
${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="--vsenv"' || ''}}
65+
${{ matrix.os == 'windows-latest' && '--config-settings=setup-args="-Dvsenv=true"' || ''}}
66+
6567
conda list
6668
6769
- name: Run Tests

pydiso/_mkl_solver.pyx.in

Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
#cython: language_level=3
2+
cimport numpy as np
3+
import cython
4+
from cpython.pythread cimport (
5+
PyThread_type_lock,
6+
PyThread_allocate_lock,
7+
PyThread_acquire_lock,
8+
PyThread_release_lock,
9+
PyThread_free_lock
10+
)
11+
12+
import numpy as np
13+
import os
14+
15+
# We use np.PyArray_DATA to grab the pointer
16+
# to a numpy array.
17+
np.import_array()
18+
19+
cdef extern from 'mkl.h':
20+
ctypedef long long MKL_INT64
21+
ctypedef int MKL_INT
22+
23+
ctypedef MKL_INT int_t
24+
ctypedef MKL_INT64 long_t
25+
26+
cdef extern from 'mkl.h':
27+
int MKL_DOMAIN_PARDISO
28+
29+
ctypedef struct MKLVersion:
30+
int MajorVersion
31+
int MinorVersion
32+
int UpdateVersion
33+
char * ProductStatus
34+
char * Build
35+
char * Processor
36+
char * Platform
37+
38+
void mkl_get_version(MKLVersion* pv)
39+
40+
void mkl_set_num_threads(int nth)
41+
int mkl_domain_set_num_threads(int nt, int domain)
42+
int mkl_get_max_threads()
43+
int mkl_domain_get_max_threads(int domain)
44+
45+
ctypedef int (*ProgressEntry)(int* thread, int* step, char* stage, int stage_len) except? -1;
46+
ProgressEntry mkl_set_progress(ProgressEntry progress);
47+
48+
ctypedef void * _MKL_DSS_HANDLE_t
49+
50+
void pardiso(_MKL_DSS_HANDLE_t, const int_t*, const int_t*, const int_t*,
51+
const int_t *, const int_t *, const void *, const int_t *,
52+
const int_t *, int_t *, const int_t *, int_t *,
53+
const int_t *, void *, void *, int_t *) nogil
54+
55+
void pardiso_64(_MKL_DSS_HANDLE_t, const long_t *, const long_t *, const long_t *,
56+
const long_t *, const long_t *, const void *, const long_t *,
57+
const long_t *, long_t *, const long_t *, long_t *,
58+
const long_t *, void *, void *, long_t *) nogil
59+
60+
61+
_err_messages = {0:"no error",
62+
-1:'input inconsistent',
63+
-2:'not enough memory',
64+
-3:'reordering problem',
65+
-4:'zero pivot, numerical factorization or iterative refinement problem',
66+
-5:'unclassified (internal) error',
67+
-6:'reordering failed',
68+
-7:'diagonal matrix is singular',
69+
-8:'32-bit integer overflow problem',
70+
-9:'not enough memory for OOC',
71+
-10:'error opening OOC files',
72+
-11:'read/write error with OOC files',
73+
-12:'pardiso_64 called from 32-bit library',
74+
}
75+
76+
class PardisoError(Exception):
77+
pass
78+
79+
class PardisoWarning(UserWarning):
80+
pass
81+
82+
83+
#call pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, perm, nrhs, iparm, msglvl, b, x, error)
84+
cdef int mkl_progress(int *thread, int* step, char* stage, int stage_len) nogil:
85+
# must be a nogil process to pass to mkl pardiso progress reporting
86+
with gil:
87+
# must reacquire the gil to print out back to python.
88+
print(thread[0], step[0], stage, stage_len)
89+
return 0
90+
91+
cdef int mkl_no_progress(int *thread, int* step, char* stage, int stage_len) nogil:
92+
return 0
93+
94+
95+
def get_mkl_max_threads():
96+
"""
97+
Returns the current number of openMP threads available to the MKL Library
98+
"""
99+
return mkl_get_max_threads()
100+
101+
def get_mkl_pardiso_max_threads():
102+
"""
103+
Returns the current number of openMP threads available to the Pardiso functions
104+
"""
105+
return mkl_domain_get_max_threads(MKL_DOMAIN_PARDISO)
106+
107+
def set_mkl_threads(num_threads=None):
108+
"""
109+
Sets the number of openMP threads available to the MKL library.
110+
111+
Parameters
112+
----------
113+
num_threads : None or int
114+
number of threads to use for the MKL library.
115+
None will set the number of threads to that returned by `os.cpu_count()`.
116+
"""
117+
if num_threads is None:
118+
num_threads = os.cpu_count()
119+
elif num_threads<=0:
120+
raise ValueError('Number of threads must be greater than 0')
121+
mkl_set_num_threads(num_threads)
122+
123+
def set_mkl_pardiso_threads(num_threads=None):
124+
"""
125+
Sets the number of openMP threads available to the Pardiso functions
126+
127+
Parameters
128+
----------
129+
num_threads : None or int
130+
Number of threads to use for the MKL Pardiso routines.
131+
None (or 0) will set the number of threads to `get_mkl_max_threads`
132+
"""
133+
if num_threads is None:
134+
num_threads = 0
135+
elif num_threads<0:
136+
raise ValueError('Number of threads must be greater than 0')
137+
mkl_domain_set_num_threads(num_threads, MKL_DOMAIN_PARDISO)
138+
139+
def get_mkl_version():
140+
"""
141+
Returns a dictionary describing the version of Intel Math Kernel Library used
142+
"""
143+
cdef MKLVersion vers
144+
mkl_get_version(&vers)
145+
return vers
146+
147+
def get_mkl_int_size():
148+
"""Return the size of the MKL_INT at compile time in bytes.
149+
150+
Returns
151+
-------
152+
int
153+
"""
154+
return sizeof(MKL_INT)
155+
156+
157+
def get_mkl_int64_size():
158+
"""Return the size of the MKL_INT64 at compile time in bytes.
159+
160+
Returns
161+
-------
162+
int
163+
"""
164+
return sizeof(MKL_INT64)
165+
166+
167+
168+
ctypedef fused real_or_complex:
169+
np.float32_t
170+
np.float64_t
171+
np.complex64_t
172+
np.complex128_t
173+
174+
175+
{{for int_type in ["int_t", "long_t"]}}
176+
cdef class _PardisoHandle_{{int_type}}:
177+
cdef _MKL_DSS_HANDLE_t handle[64]
178+
cdef PyThread_type_lock lock
179+
180+
cdef {{int_type}} n, maxfct, mnum, msglvl
181+
cdef public {{int_type}} matrix_type
182+
cdef public {{int_type}}[64] iparm
183+
cdef public {{int_type}}[:] perm
184+
185+
@cython.boundscheck(False)
186+
def __cinit__(self, A_dat_dtype, n, matrix_type, maxfct, mnum, msglvl):
187+
self.lock = PyThread_allocate_lock()
188+
189+
np_int_dtype = np.dtype(f"i{sizeof({{int_type}})}")
190+
191+
for i in range(64):
192+
self.handle[i] = NULL
193+
194+
self.n = n
195+
self.matrix_type = matrix_type
196+
self.maxfct = maxfct
197+
self.mnum = mnum
198+
self.msglvl = msglvl
199+
200+
if self.msglvl:
201+
#for reporting factorization progress via python's `print`
202+
mkl_set_progress(mkl_progress)
203+
else:
204+
mkl_set_progress(mkl_no_progress)
205+
206+
is_single_precision = np.issubdtype(A_dat_dtype, np.single) or np.issubdtype(A_dat_dtype, np.csingle)
207+
208+
self.perm = np.empty(self.n, dtype=np_int_dtype)
209+
210+
for i in range(64):
211+
self.iparm[i] = 0 # ensure these all start at 0
212+
213+
# set default parameters
214+
self.iparm[0] = 1 # tell pardiso to not reset these values on the first call
215+
self.iparm[1] = 2 # The nested dissection algorithm from the METIS
216+
self.iparm[3] = 0 # The factorization is always computed as required by phase.
217+
self.iparm[4] = 2 # fill perm with computed permutation vector
218+
self.iparm[5] = 0 # The array x contains the solution; right-hand side vector b is kept unchanged.
219+
self.iparm[7] = 0 # The solver automatically performs two steps of iterative refinement when perterbed pivots are obtained
220+
self.iparm[9] = 13 if matrix_type in [11, 13] else 8
221+
self.iparm[10] = 1 if matrix_type in [11, 13] else 0
222+
self.iparm[11] = 0 # Solve a linear system AX = B (as opposed to A.T or A.H)
223+
self.iparm[12] = 1 if matrix_type in [11, 13] else 0
224+
self.iparm[17] = -1 # Return the number of non-zeros in this value after first call
225+
self.iparm[18] = 0 # do not report flop count
226+
self.iparm[20] = 1 if matrix_type in [-2, -4, 6] else 0
227+
self.iparm[23] = 0 # classic (not parallel) factorization
228+
self.iparm[24] = 0 # default behavoir of parallel solving
229+
self.iparm[26] = 1 # Do not check the input matrix
230+
self.iparm[27] = is_single_precision # 1 if single, 0 if double
231+
self.iparm[30] = 0 # this would be used to enable sparse input/output for solves
232+
self.iparm[33] = 0 # optimal number of thread for CNR mode
233+
self.iparm[34] = 1 # zero based indexing
234+
self.iparm[35] = 0 # Do not compute schur complement
235+
self.iparm[36] = 0 # use CSR storage format
236+
self.iparm[38] = 0 # Do not use low rank update
237+
self.iparm[42] = 0 # Do not compute the diagonal of the inverse
238+
self.iparm[55] = 0 # Internal function used to work with pivot and calculation of diagonal arrays turned off.
239+
self.iparm[59] = 0 # operate in-core mode
240+
241+
def initialized(self):
242+
return self._initialized()
243+
244+
cdef int _initialized(self) noexcept nogil:
245+
# If any of the handle pointers are not null, return 1
246+
cdef int i
247+
for i in range(64):
248+
if self.handle[i]:
249+
return 1
250+
return 0
251+
252+
def set_iparm(self, {{int_type}} i, {{int_type}} val):
253+
self.iparm[i] = val
254+
255+
@cython.boundscheck(False)
256+
cpdef {{int_type}} call_pardiso(self,
257+
{{int_type}} phase,
258+
real_or_complex[::1] a_data,
259+
{{int_type}}[::1] a_indptr,
260+
{{int_type}}[::1] a_indices,
261+
real_or_complex[::1, :] rhs,
262+
real_or_complex[::1, :] out
263+
):
264+
cdef {{int_type}} error, nrhs
265+
with nogil:
266+
nrhs = rhs.shape[1]
267+
PyThread_acquire_lock(self.lock, mode=1)
268+
pardiso{{if int_type == "long_t"}}_64{{endif}}(
269+
self.handle, &self.maxfct, &self.mnum, &self.matrix_type, &phase, &self.n,
270+
&a_data[0], &a_indptr[0], &a_indices[0], &self.perm[0],
271+
&nrhs, self.iparm, &self.msglvl,
272+
&rhs[0, 0], &out[0, 0], &error
273+
)
274+
PyThread_release_lock(self.lock)
275+
return error
276+
277+
@cython.boundscheck(False)
278+
def __dealloc__(self):
279+
# Need to call pardiso with phase=-1 to release memory (if it was initialized)
280+
cdef {{int_type}} phase = -1, nrhs = 0, error = 0
281+
282+
with nogil:
283+
PyThread_acquire_lock(self.lock, mode=1)
284+
if self._initialized():
285+
pardiso{{if int_type == "long_t"}}_64{{endif}}(
286+
self.handle, &self.maxfct, &self.mnum, &self.matrix_type,
287+
&phase, &self.n, NULL, NULL, NULL, NULL, &nrhs, self.iparm,
288+
&self.msglvl, NULL, NULL, &error)
289+
if error == 0:
290+
for i in range(64):
291+
self.handle[i] = NULL
292+
PyThread_release_lock(self.lock)
293+
if error != 0:
294+
raise MemoryError("Pardiso Memory release error: " + _err_messages[error])
295+
if self.lock:
296+
#deallocate the lock
297+
PyThread_free_lock(self.lock)
298+
self.lock = NULL
299+
{{endfor}}

pydiso/meson.build

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
cython_file = custom_target(
2+
input: '_mkl_solver.pyx.in',
3+
output: '_mkl_solver.pyx',
4+
command: [py,
5+
'-c',
6+
'''
7+
import sys
8+
from pathlib import Path
9+
from Cython.Tempita import sub
10+
11+
template = Path(sys.argv[1]).read_text("utf8")
12+
output = sub(template)
13+
Path(sys.argv[2]).write_text(output, "utf8")
14+
''', '@INPUT@', '@OUTPUT@']
15+
)
16+
117
# NumPy include directory
218
np_dep = dependency('numpy')
319
numpy_nodepr_api = ['-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION']
@@ -15,7 +31,6 @@ endif
1531
# MKL-specific options
1632
_threading_opt = get_option('mkl-threading')
1733
if _threading_opt == 'auto'
18-
# openmp.pc not included with conda-forge distribution (yet)
1934
mkl_dep_name += '-seq'
2035
else
2136
mkl_dep_name += '-' + _threading_opt
@@ -45,37 +60,13 @@ else
4560

4661
endif
4762

48-
# Deal with M_PI & friends; add `use_math_defines` to c_args or cpp_args
49-
# Cython doesn't always get this right itself (see, e.g., gh-16800), so
50-
# explicitly add the define as a compiler flag for Cython-generated code.
51-
is_windows = host_machine.system() == 'windows'
52-
if is_windows
53-
use_math_defines = ['-D_USE_MATH_DEFINES']
54-
else
55-
use_math_defines = []
56-
endif
57-
5863
c_undefined_ok = ['-Wno-maybe-uninitialized']
59-
cython_c_args = [numpy_nodepr_api, use_math_defines, '-DCYTHON_CCOMPLEX=0']
60-
61-
cython_file = 'mkl_solver.pyx'
62-
63-
if get_option('b_coverage')
64-
# tell cython to enable linetracing
65-
add_project_arguments(['--directive', 'linetrace=true'], language : 'cython')
66-
# tell the c_compiler to definie the CYTHON_TRACE_NOGIL
67-
add_project_arguments(['-DCYTHON_TRACE_NOGIL=1'], language : 'c')
68-
69-
# compile the .c file from the .pyx file in it's directory.
70-
# These should include the default options passed to the cython compiler
71-
cython_file_full_path = meson.current_source_dir() / cython_file
72-
run_command(cython, '-M', '--fast-fail', '-3', '--directive', 'linetrace=true', cython_file_full_path)
73-
endif
64+
cython_c_args = [numpy_nodepr_api, '-DCYTHON_CCOMPLEX=0']
7465

7566
module_path = 'pydiso'
7667

7768
py.extension_module(
78-
'mkl_solver',
69+
'_mkl_solver',
7970
cython_file,
8071
c_args: cython_c_args,
8172
install: true,
@@ -84,7 +75,7 @@ py.extension_module(
8475
)
8576

8677
python_sources = [
87-
'__init__.py',
78+
'__init__.py', 'mkl_solver.py'
8879
]
8980

9081
py.install_sources(

0 commit comments

Comments
 (0)