Skip to content

Commit 9f572f6

Browse files
authored
Merge pull request #192 from iyamazaki/aasen-2stage
two-stage Aasen Only REAL (S) and COMPLEX (C) Precision.... more coming soon...
2 parents 439e208 + c74272e commit 9f572f6

46 files changed

Lines changed: 7418 additions & 91 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

SRC/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ set(SLASRC
129129
ssytf2_rk.f ssytrf_rk.f ssytrs_3.f
130130
ssytri_3.f ssytri_3x.f ssycon_3.f ssysv_rk.f
131131
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f
132+
ssysv_aa_2stage.f ssytrf_aa_2stage.f ssytrs_aa_2stage.f
132133
stbcon.f
133134
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
134135
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
@@ -188,6 +189,7 @@ set(CLASRC
188189
chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f
189190
chetrs_3.f checon_3.f chesv_rk.f
190191
chesv_aa.f chetrf_aa.f chetrs_aa.f
192+
chesv_aa_2stage.f chetrf_aa_2stage.f chetrs_aa_2stage.f
191193
chgeqz.f chpcon.f chpev.f chpevd.f
192194
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
193195
chpsvx.f
@@ -316,6 +318,7 @@ set(DLASRC
316318
dsytf2_rk.f dsytrf_rk.f dsytrs_3.f
317319
dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f
318320
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f
321+
dsysv_aa_2stage.f dsytrf_aa_2stage.f dsytrs_aa_2stage.f
319322
dtbcon.f
320323
dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
321324
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f

SRC/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ SLASRC = \
152152
ssytf2_rk.o ssytrf_rk.o ssytrs_3.o \
153153
ssytri_3.o ssytri_3x.o ssycon_3.o ssysv_rk.o \
154154
slasyf_aa.o ssysv_aa.o ssytrf_aa.o ssytrs_aa.o \
155+
ssysv_aa_2stage.o ssytrf_aa_2stage.o ssytrs_aa_2stage.o \
155156
stbcon.o \
156157
stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \
157158
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
@@ -213,6 +214,7 @@ CLASRC = \
213214
chetf2_rk.o chetrf_rk.o chetri_3.o chetri_3x.o \
214215
chetrs_3.o checon_3.o chesv_rk.o \
215216
chesv_aa.o chetrf_aa.o chetrs_aa.o clahef_aa.o \
217+
chesv_aa_2stage.o chetrf_aa_2stage.o chetrs_aa_2stage.o \
216218
chgeqz.o chpcon.o chpev.o chpevd.o \
217219
chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \
218220
chpsvx.o \
@@ -346,6 +348,7 @@ DLASRC = \
346348
dsytf2_rk.o dsytrf_rk.o dsytrs_3.o \
347349
dsytri_3.o dsytri_3x.o dsycon_3.o dsysv_rk.o \
348350
dlasyf_aa.o dsysv_aa.o dsytrf_aa.o dsytrs_aa.o \
351+
dsysv_aa_2stage.o dsytrf_aa_2stage.o dsytrs_aa_2stage.o \
349352
dtbcon.o dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \
350353
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
351354
dtptrs.o \

SRC/chesv_aa_2stage.f

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
*> \brief <b> CHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices</b>
2+
*
3+
* @precisions fortran c -> s d
4+
*
5+
* =========== DOCUMENTATION ===========
6+
*
7+
* Online html documentation available at
8+
* http://www.netlib.org/lapack/explore-html/
9+
*
10+
*> \htmlonly
11+
*> Download CHESV_AA_2STAGE + dependencies
12+
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chesv_aa_2stage.f">
13+
*> [TGZ]</a>
14+
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chesv_aa_2stage.f">
15+
*> [ZIP]</a>
16+
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chesv_aa_2stage.f">
17+
*> [TXT]</a>
18+
*> \endhtmlonly
19+
*
20+
* Definition:
21+
* ===========
22+
*
23+
* SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB,
24+
* IPIV, IPIV2, B, LDB, WORK, LWORK,
25+
* INFO )
26+
*
27+
* .. Scalar Arguments ..
28+
* CHARACTER UPLO
29+
* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO
30+
* ..
31+
* .. Array Arguments ..
32+
* INTEGER IPIV( * ), IPIV2( * )
33+
* COMPLEX A( LDA, * ), TB( * ), B( LDB, *), WORK( * )
34+
* ..
35+
*
36+
*> \par Purpose:
37+
* =============
38+
*>
39+
*> \verbatim
40+
*>
41+
*> CHESV_AA_2STAGE computes the solution to a complex system of
42+
*> linear equations
43+
*> A * X = B,
44+
*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS
45+
*> matrices.
46+
*>
47+
*> Aasen's 2-stage algorithm is used to factor A as
48+
*> A = U * T * U**H, if UPLO = 'U', or
49+
*> A = L * T * L**H, if UPLO = 'L',
50+
*> where U (or L) is a product of permutation and unit upper (lower)
51+
*> triangular matrices, and T is Hermitian and band. The matrix T is
52+
*> then LU-factored with partial pivoting. The factored form of A
53+
*> is then used to solve the system of equations A * X = B.
54+
*>
55+
*> This is the blocked version of the algorithm, calling Level 3 BLAS.
56+
*> \endverbatim
57+
*
58+
* Arguments:
59+
* ==========
60+
*
61+
*> \param[in] UPLO
62+
*> \verbatim
63+
*> UPLO is CHARACTER*1
64+
*> = 'U': Upper triangle of A is stored;
65+
*> = 'L': Lower triangle of A is stored.
66+
*> \endverbatim
67+
*>
68+
*> \param[in] N
69+
*> \verbatim
70+
*> N is INTEGER
71+
*> The order of the matrix A. N >= 0.
72+
*> \endverbatim
73+
*>
74+
*> \param[in] NRHS
75+
*> \verbatim
76+
*> NRHS is INTEGER
77+
*> The number of right hand sides, i.e., the number of columns
78+
*> of the matrix B. NRHS >= 0.
79+
*> \endverbatim
80+
*>
81+
*> \param[in,out] A
82+
*> \verbatim
83+
*> A is COMPLEX array, dimension (LDA,N)
84+
*> On entry, the hermitian matrix A. If UPLO = 'U', the leading
85+
*> N-by-N upper triangular part of A contains the upper
86+
*> triangular part of the matrix A, and the strictly lower
87+
*> triangular part of A is not referenced. If UPLO = 'L', the
88+
*> leading N-by-N lower triangular part of A contains the lower
89+
*> triangular part of the matrix A, and the strictly upper
90+
*> triangular part of A is not referenced.
91+
*>
92+
*> On exit, L is stored below (or above) the subdiaonal blocks,
93+
*> when UPLO is 'L' (or 'U').
94+
*> \endverbatim
95+
*>
96+
*> \param[in] LDA
97+
*> \verbatim
98+
*> LDA is INTEGER
99+
*> The leading dimension of the array A. LDA >= max(1,N).
100+
*> \endverbatim
101+
*>
102+
*> \param[out] TB
103+
*> \verbatim
104+
*> TB is COMPLEX array, dimension (LTB)
105+
*> On exit, details of the LU factorization of the band matrix.
106+
*> \endverbatim
107+
*>
108+
*> \param[in] LTB
109+
*> \verbatim
110+
*> The size of the array TB. LTB >= 4*N, internally
111+
*> used to select NB such that LTB >= (3*NB+1)*N.
112+
*>
113+
*> If LTB = -1, then a workspace query is assumed; the
114+
*> routine only calculates the optimal size of LTB,
115+
*> returns this value as the first entry of TB, and
116+
*> no error message related to LTB is issued by XERBLA.
117+
*> \endverbatim
118+
*>
119+
*> \param[out] IPIV
120+
*> \verbatim
121+
*> IPIV is INTEGER array, dimension (N)
122+
*> On exit, it contains the details of the interchanges, i.e.,
123+
*> the row and column k of A were interchanged with the
124+
*> row and column IPIV(k).
125+
*> \endverbatim
126+
*>
127+
*> \param[out] IPIV2
128+
*> \verbatim
129+
*> IPIV is INTEGER array, dimension (N)
130+
*> On exit, it contains the details of the interchanges, i.e.,
131+
*> the row and column k of T were interchanged with the
132+
*> row and column IPIV(k).
133+
*> \endverbatim
134+
*>
135+
*> \param[in,out] B
136+
*> \verbatim
137+
*> B is COMPLEX array, dimension (LDB,NRHS)
138+
*> On entry, the right hand side matrix B.
139+
*> On exit, the solution matrix X.
140+
*> \endverbatim
141+
*>
142+
*> \param[in] LDB
143+
*> \verbatim
144+
*> LDB is INTEGER
145+
*> The leading dimension of the array B. LDB >= max(1,N).
146+
*> \endverbatim
147+
*>
148+
*> \param[out] WORK
149+
*> \verbatim
150+
*> WORK is COMPLEX workspace of size LWORK
151+
*> \endverbatim
152+
*>
153+
*> \param[in] LWORK
154+
*> \verbatim
155+
*> The size of WORK. LWORK >= N, internally used to select NB
156+
*> such that LWORK >= N*NB.
157+
*>
158+
*> If LWORK = -1, then a workspace query is assumed; the
159+
*> routine only calculates the optimal size of the WORK array,
160+
*> returns this value as the first entry of the WORK array, and
161+
*> no error message related to LWORK is issued by XERBLA.
162+
*> \endverbatim
163+
*>
164+
*> \param[out] INFO
165+
*> \verbatim
166+
*> INFO is INTEGER
167+
*> = 0: successful exit
168+
*> < 0: if INFO = -i, the i-th argument had an illegal value.
169+
*> > 0: if INFO = i, band LU factorization failed on i-th column
170+
*> \endverbatim
171+
*
172+
* Authors:
173+
* ========
174+
*
175+
*> \author Univ. of Tennessee
176+
*> \author Univ. of California Berkeley
177+
*> \author Univ. of Colorado Denver
178+
*> \author NAG Ltd.
179+
*
180+
*> \date December 2016
181+
*
182+
*> \ingroup complexSYcomputational
183+
*
184+
* =====================================================================
185+
SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB,
186+
$ IPIV, IPIV2, B, LDB, WORK, LWORK,
187+
$ INFO )
188+
*
189+
* -- LAPACK computational routine (version 3.7.0) --
190+
* -- LAPACK is a software package provided by Univ. of Tennessee, --
191+
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
192+
* December 2016
193+
*
194+
IMPLICIT NONE
195+
*
196+
* .. Scalar Arguments ..
197+
CHARACTER UPLO
198+
INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO
199+
* ..
200+
* .. Array Arguments ..
201+
INTEGER IPIV( * ), IPIV2( * )
202+
COMPLEX A( LDA, * ), B( LDB, * ), TB( * ), WORK( * )
203+
* ..
204+
*
205+
* =====================================================================
206+
* .. Parameters ..
207+
COMPLEX ZERO, ONE
208+
PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ),
209+
$ ONE = ( 1.0E+0, 0.0E+0 ) )
210+
*
211+
* .. Local Scalars ..
212+
LOGICAL UPPER, TQUERY, WQUERY
213+
INTEGER I, J, K, I1, I2, TD
214+
INTEGER LDTB, LWKOPT, NB, KB, NT, IINFO
215+
COMPLEX PIV
216+
* ..
217+
* .. External Functions ..
218+
LOGICAL LSAME
219+
INTEGER ILAENV
220+
EXTERNAL LSAME, ILAENV
221+
* ..
222+
* .. External Subroutines ..
223+
EXTERNAL XERBLA
224+
* ..
225+
* .. Intrinsic Functions ..
226+
INTRINSIC CONJG, MIN, MAX
227+
* ..
228+
* .. Executable Statements ..
229+
*
230+
* Test the input parameters.
231+
*
232+
INFO = 0
233+
UPPER = LSAME( UPLO, 'U' )
234+
WQUERY = ( LWORK.EQ.-1 )
235+
TQUERY = ( LTB.EQ.-1 )
236+
IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
237+
INFO = -1
238+
ELSE IF( N.LT.0 ) THEN
239+
INFO = -2
240+
ELSE IF( NRHS.LT.0 ) THEN
241+
INFO = -3
242+
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
243+
INFO = -5
244+
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
245+
INFO = -11
246+
END IF
247+
*
248+
IF( INFO.EQ.0 ) THEN
249+
CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV,
250+
$ IPIV2, WORK, -1, INFO )
251+
LWKOPT = INT( WORK(1) )
252+
IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN
253+
INFO = -7
254+
ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN
255+
INFO = -13
256+
END IF
257+
END IF
258+
*
259+
IF( INFO.NE.0 ) THEN
260+
CALL XERBLA( 'CHESV_AA_2STAGE', -INFO )
261+
RETURN
262+
ELSE IF( WQUERY .OR. TQUERY ) THEN
263+
RETURN
264+
END IF
265+
*
266+
*
267+
* Compute the factorization A = U*T*U**H or A = L*T*L**H.
268+
*
269+
CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2,
270+
$ WORK, LWORK, INFO )
271+
IF( INFO.EQ.0 ) THEN
272+
*
273+
* Solve the system A*X = B, overwriting B with X.
274+
*
275+
CALL CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV,
276+
$ IPIV2, B, LDB, INFO )
277+
*
278+
END IF
279+
*
280+
WORK( 1 ) = LWKOPT
281+
*
282+
* End of CHESV_AA_2STAGE
283+
*
284+
END

SRC/chetrf_aa.f

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
174174
*
175175
* Determine the block size
176176
*
177-
NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
177+
NB = ILAENV( 1, 'CHETRF_AA', UPLO, N, -1, -1, -1 )
178178
*
179179
* Test the input parameters.
180180
*

0 commit comments

Comments
 (0)