Skip to content

Commit 88ee750

Browse files
authored
Merge pull request #203 from iyamazaki/aasen-2stage
Aasen 2stage
2 parents d15c92b + 9eb4287 commit 88ee750

6 files changed

Lines changed: 179 additions & 155 deletions

File tree

SRC/chetrf_aa_2stage.f

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
183183
* .. Local Scalars ..
184184
LOGICAL UPPER, TQUERY, WQUERY
185185
INTEGER I, J, K, I1, I2, TD
186-
INTEGER LDTB, NB, KB, NT, IINFO
186+
INTEGER LDTB, NB, KB, JB, NT, IINFO
187187
COMPLEX PIV
188188
* ..
189189
* .. External Functions ..
@@ -286,23 +286,25 @@ SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
286286
DO I = 1, J-1
287287
IF( I.EQ.1 ) THEN
288288
* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J)
289-
CALL CGEMM( 'NoTranspose', 'NoTranspose',
290-
$ NB, KB, 2*NB,
291-
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
292-
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
293-
$ ZERO, WORK( I*NB+1 ), N )
294-
ELSE IF( I .EQ. J-1) THEN
295-
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
289+
IF( I .EQ. (J-1) ) THEN
290+
JB = NB+KB
291+
ELSE
292+
JB = 2*NB
293+
END IF
296294
CALL CGEMM( 'NoTranspose', 'NoTranspose',
297-
$ NB, KB, 2*NB+KB,
298-
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
299-
$ LDTB-1,
300-
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
295+
$ NB, KB, JB,
296+
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
297+
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
301298
$ ZERO, WORK( I*NB+1 ), N )
302299
ELSE
303300
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
301+
IF( I .EQ. (J-1) ) THEN
302+
JB = 2*NB+KB
303+
ELSE
304+
JB = 3*NB
305+
END IF
304306
CALL CGEMM( 'NoTranspose', 'NoTranspose',
305-
$ NB, KB, 3*NB,
307+
$ NB, KB, JB,
306308
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
307309
$ LDTB-1,
308310
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
@@ -484,23 +486,25 @@ SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
484486
DO I = 1, J-1
485487
IF( I.EQ.1 ) THEN
486488
* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)'
487-
CALL CGEMM( 'NoTranspose', 'Conjugate transpose',
488-
$ NB, KB, 2*NB,
489-
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
490-
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
491-
$ ZERO, WORK( I*NB+1 ), N )
492-
ELSE IF( I .EQ. J-1) THEN
493-
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
489+
IF( I .EQ. (J-1) ) THEN
490+
JB = NB+KB
491+
ELSE
492+
JB = 2*NB
493+
END IF
494494
CALL CGEMM( 'NoTranspose', 'Conjugate transpose',
495-
$ NB, KB, 2*NB+KB,
496-
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
497-
$ LDTB-1,
498-
$ A( J*NB+1, (I-2)*NB+1 ), LDA,
495+
$ NB, KB, JB,
496+
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
497+
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
499498
$ ZERO, WORK( I*NB+1 ), N )
500499
ELSE
501500
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
501+
IF( I .EQ. (J-1) ) THEN
502+
JB = 2*NB+KB
503+
ELSE
504+
JB = 3*NB
505+
END IF
502506
CALL CGEMM( 'NoTranspose', 'Conjugate transpose',
503-
$ NB, KB, 3*NB,
507+
$ NB, KB, JB,
504508
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
505509
$ LDTB-1,
506510
$ A( J*NB+1, (I-2)*NB+1 ), LDA,

SRC/csytrf_aa_2stage.f

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
183183
* .. Local Scalars ..
184184
LOGICAL UPPER, TQUERY, WQUERY
185185
INTEGER I, J, K, I1, I2, TD
186-
INTEGER LDTB, NB, KB, NT, IINFO
186+
INTEGER LDTB, NB, KB, JB, NT, IINFO
187187
COMPLEX PIV
188188
* ..
189189
* .. External Functions ..
@@ -284,23 +284,25 @@ SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
284284
DO I = 1, J-1
285285
IF( I.EQ.1 ) THEN
286286
* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J)
287-
CALL CGEMM( 'NoTranspose', 'NoTranspose',
288-
$ NB, KB, 2*NB,
289-
$ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
290-
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
291-
$ CZERO, WORK( I*NB+1 ), N )
292-
ELSE IF( I .EQ. J-1) THEN
293-
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
287+
IF( I .EQ. (J-1) ) THEN
288+
JB = NB+KB
289+
ELSE
290+
JB = 2*NB
291+
END IF
294292
CALL CGEMM( 'NoTranspose', 'NoTranspose',
295-
$ NB, KB, 2*NB+KB,
296-
$ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
297-
$ LDTB-1,
298-
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
293+
$ NB, KB, JB,
294+
$ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
295+
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
299296
$ CZERO, WORK( I*NB+1 ), N )
300297
ELSE
301298
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
299+
IF( I .EQ. J-1) THEN
300+
JB = 2*NB+KB
301+
ELSE
302+
JB = 3*NB
303+
END IF
302304
CALL CGEMM( 'NoTranspose', 'NoTranspose',
303-
$ NB, KB, 3*NB,
305+
$ NB, KB, JB,
304306
$ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
305307
$ LDTB-1,
306308
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
@@ -477,23 +479,25 @@ SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
477479
DO I = 1, J-1
478480
IF( I.EQ.1 ) THEN
479481
* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)'
480-
CALL CGEMM( 'NoTranspose', 'Transpose',
481-
$ NB, KB, 2*NB,
482-
$ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
483-
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
484-
$ CZERO, WORK( I*NB+1 ), N )
485-
ELSE IF( I .EQ. J-1) THEN
486-
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
482+
IF( I .EQ. (J-1) ) THEN
483+
JB = NB+KB
484+
ELSE
485+
JB = 2*NB
486+
END IF
487487
CALL CGEMM( 'NoTranspose', 'Transpose',
488-
$ NB, KB, 2*NB+KB,
489-
$ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
490-
$ LDTB-1,
491-
$ A( J*NB+1, (I-2)*NB+1 ), LDA,
488+
$ NB, KB, JB,
489+
$ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
490+
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
492491
$ CZERO, WORK( I*NB+1 ), N )
493492
ELSE
494493
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
494+
IF( I .EQ. (J-1) ) THEN
495+
JB = 2*NB+KB
496+
ELSE
497+
JB = 3*NB
498+
END IF
495499
CALL CGEMM( 'NoTranspose', 'Transpose',
496-
$ NB, KB, 3*NB,
500+
$ NB, KB, JB,
497501
$ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
498502
$ LDTB-1,
499503
$ A( J*NB+1, (I-2)*NB+1 ), LDA,

SRC/dsytrf_aa_2stage.f

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
182182
* .. Local Scalars ..
183183
LOGICAL UPPER, TQUERY, WQUERY
184184
INTEGER I, J, K, I1, I2, TD
185-
INTEGER LDTB, NB, KB, NT, IINFO
185+
INTEGER LDTB, NB, KB, JB, NT, IINFO
186186
DOUBLE PRECISION PIV
187187
* ..
188188
* .. External Functions ..
@@ -282,25 +282,27 @@ SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
282282
*
283283
KB = MIN(NB, N-J*NB)
284284
DO I = 1, J-1
285-
IF( I.EQ.1 ) THEN
286-
* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J)
287-
CALL DGEMM( 'NoTranspose', 'NoTranspose',
288-
$ NB, KB, 2*NB,
289-
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
290-
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
291-
$ ZERO, WORK( I*NB+1 ), N )
292-
ELSE IF( I .EQ. J-1) THEN
293-
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
285+
IF( I .EQ. 1 ) THEN
286+
* H(I,J) = T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
287+
IF( I .EQ. (J-1) ) THEN
288+
JB = NB+KB
289+
ELSE
290+
JB = 2*NB
291+
END IF
294292
CALL DGEMM( 'NoTranspose', 'NoTranspose',
295-
$ NB, KB, 2*NB+KB,
296-
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
297-
$ LDTB-1,
298-
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
293+
$ NB, KB, JB,
294+
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
295+
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
299296
$ ZERO, WORK( I*NB+1 ), N )
300-
ELSE
297+
ELSE
301298
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
299+
IF( I .EQ. J-1) THEN
300+
JB = 2*NB+KB
301+
ELSE
302+
JB = 3*NB
303+
END IF
302304
CALL DGEMM( 'NoTranspose', 'NoTranspose',
303-
$ NB, KB, 3*NB,
305+
$ NB, KB, JB,
304306
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
305307
$ LDTB-1,
306308
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
@@ -471,23 +473,25 @@ SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
471473
DO I = 1, J-1
472474
IF( I.EQ.1 ) THEN
473475
* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)'
476+
IF( I .EQ. J-1) THEN
477+
JB = NB+KB
478+
ELSE
479+
JB = 2*NB
480+
END IF
474481
CALL DGEMM( 'NoTranspose', 'Transpose',
475-
$ NB, KB, 2*NB,
482+
$ NB, KB, JB,
476483
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
477484
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
478485
$ ZERO, WORK( I*NB+1 ), N )
479-
ELSE IF( I .EQ. J-1) THEN
480-
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
481-
CALL DGEMM( 'NoTranspose', 'Transpose',
482-
$ NB, KB, 2*NB+KB,
483-
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
484-
$ LDTB-1,
485-
$ A( J*NB+1, (I-2)*NB+1 ), LDA,
486-
$ ZERO, WORK( I*NB+1 ), N )
487-
ELSE
486+
ELSE
488487
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
488+
IF( I .EQ. J-1) THEN
489+
JB = 2*NB+KB
490+
ELSE
491+
JB = 3*NB
492+
END IF
489493
CALL DGEMM( 'NoTranspose', 'Transpose',
490-
$ NB, KB, 3*NB,
494+
$ NB, KB, JB,
491495
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
492496
$ LDTB-1,
493497
$ A( J*NB+1, (I-2)*NB+1 ), LDA,
@@ -590,8 +594,8 @@ SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
590594
*
591595
DO K = 1, NB
592596
DO I = 1, KB
593-
TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) =
594-
$ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB )
597+
TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB )
598+
$ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB )
595599
END DO
596600
END DO
597601
CALL DLASET( 'Upper', KB, NB, ZERO, ONE,

SRC/ssytrf_aa_2stage.f

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
182182
* .. Local Scalars ..
183183
LOGICAL UPPER, TQUERY, WQUERY
184184
INTEGER I, J, K, I1, I2, TD
185-
INTEGER LDTB, NB, KB, NT, IINFO
185+
INTEGER LDTB, NB, KB, JB, NT, IINFO
186186
REAL PIV
187187
* ..
188188
* .. External Functions ..
@@ -283,24 +283,26 @@ SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
283283
KB = MIN(NB, N-J*NB)
284284
DO I = 1, J-1
285285
IF( I.EQ.1 ) THEN
286-
* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J)
287-
CALL SGEMM( 'NoTranspose', 'NoTranspose',
288-
$ NB, KB, 2*NB,
289-
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
290-
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
291-
$ ZERO, WORK( I*NB+1 ), N )
292-
ELSE IF( I .EQ. J-1) THEN
293-
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
286+
* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J)
287+
IF( I .EQ. (J-1) ) THEN
288+
JB = NB+KB
289+
ELSE
290+
JB = 2*NB
291+
END IF
294292
CALL SGEMM( 'NoTranspose', 'NoTranspose',
295-
$ NB, KB, 2*NB+KB,
296-
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
297-
$ LDTB-1,
298-
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
293+
$ NB, KB, JB,
294+
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
295+
$ A( (I-1)*NB+1, J*NB+1 ), LDA,
299296
$ ZERO, WORK( I*NB+1 ), N )
300297
ELSE
301298
* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
299+
IF( I .EQ. J-1) THEN
300+
JB = 2*NB+KB
301+
ELSE
302+
JB = 3*NB
303+
END IF
302304
CALL SGEMM( 'NoTranspose', 'NoTranspose',
303-
$ NB, KB, 3*NB,
305+
$ NB, KB, JB,
304306
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
305307
$ LDTB-1,
306308
$ A( (I-2)*NB+1, J*NB+1 ), LDA,
@@ -471,23 +473,25 @@ SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
471473
DO I = 1, J-1
472474
IF( I.EQ.1 ) THEN
473475
* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)'
474-
CALL SGEMM( 'NoTranspose', 'Transpose',
475-
$ NB, KB, 2*NB,
476-
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
477-
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
478-
$ ZERO, WORK( I*NB+1 ), N )
479-
ELSE IF( I .EQ. J-1) THEN
480-
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
476+
IF( I .EQ. (J-1) ) THEN
477+
JB = NB+KB
478+
ELSE
479+
JB = 2*NB
480+
END IF
481481
CALL SGEMM( 'NoTranspose', 'Transpose',
482-
$ NB, KB, 2*NB+KB,
483-
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
484-
$ LDTB-1,
485-
$ A( J*NB+1, (I-2)*NB+1 ), LDA,
482+
$ NB, KB, JB,
483+
$ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
484+
$ A( J*NB+1, (I-1)*NB+1 ), LDA,
486485
$ ZERO, WORK( I*NB+1 ), N )
487486
ELSE
488487
* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
488+
IF( I .EQ. J-1) THEN
489+
JB = 2*NB+KB
490+
ELSE
491+
JB = 3*NB
492+
END IF
489493
CALL SGEMM( 'NoTranspose', 'Transpose',
490-
$ NB, KB, 3*NB,
494+
$ NB, KB, JB,
491495
$ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
492496
$ LDTB-1,
493497
$ A( J*NB+1, (I-2)*NB+1 ), LDA,

0 commit comments

Comments
 (0)