@@ -43,41 +43,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4343static void dscal_kernel_8 ( BLASLONG n , FLOAT * da , FLOAT * x )
4444{
4545
46- BLASLONG i ;
47- FLOAT alpha = * da ;
48-
49- for ( i = 0 ; i < n ; i += 8 )
50- {
51- x [0 ] *= alpha ;
52- x [1 ] *= alpha ;
53- x [2 ] *= alpha ;
54- x [3 ] *= alpha ;
55- x [4 ] *= alpha ;
56- x [5 ] *= alpha ;
57- x [6 ] *= alpha ;
58- x [7 ] *= alpha ;
59- x += 8 ;
60- }
46+ BLASLONG i ;
47+ FLOAT alpha = * da ;
48+
49+ for ( i = 0 ; i < n ; i += 8 )
50+ {
51+ x [0 ] *= alpha ;
52+ x [1 ] *= alpha ;
53+ x [2 ] *= alpha ;
54+ x [3 ] *= alpha ;
55+ x [4 ] *= alpha ;
56+ x [5 ] *= alpha ;
57+ x [6 ] *= alpha ;
58+ x [7 ] *= alpha ;
59+ x += 8 ;
60+ }
6161
6262}
6363
6464
6565static void dscal_kernel_8_zero ( BLASLONG n , FLOAT * alpha , FLOAT * x )
6666{
6767
68- BLASLONG i ;
69- for ( i = 0 ; i < n ; i += 8 )
70- {
71- x [0 ] = 0.0 ;
72- x [1 ] = 0.0 ;
73- x [2 ] = 0.0 ;
74- x [3 ] = 0.0 ;
75- x [4 ] = 0.0 ;
76- x [5 ] = 0.0 ;
77- x [6 ] = 0.0 ;
78- x [7 ] = 0.0 ;
79- x += 8 ;
80- }
68+ BLASLONG i ;
69+ for ( i = 0 ; i < n ; i += 8 )
70+ {
71+ x [0 ] = 0.0 ;
72+ x [1 ] = 0.0 ;
73+ x [2 ] = 0.0 ;
74+ x [3 ] = 0.0 ;
75+ x [4 ] = 0.0 ;
76+ x [5 ] = 0.0 ;
77+ x [6 ] = 0.0 ;
78+ x [7 ] = 0.0 ;
79+ x += 8 ;
80+ }
8181
8282}
8383
@@ -89,51 +89,51 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
8989static void dscal_kernel_inc_8 (BLASLONG n , FLOAT * alpha , FLOAT * x , BLASLONG inc_x )
9090{
9191
92- FLOAT * x1 = NULL ;
93- BLASLONG inc_x3 ;
92+ FLOAT * x1 = NULL ;
93+ BLASLONG inc_x3 ;
9494
95- inc_x <<= 3 ;
96- inc_x3 = (inc_x << 1 ) + inc_x ;
95+ inc_x <<= 3 ;
96+ inc_x3 = (inc_x << 1 ) + inc_x ;
9797
9898 __asm__ __volatile__
9999 (
100- "movddup (%3), %%xmm0 \n\t" // alpha
100+ "movddup (%3), %%xmm0 \n\t" // alpha
101101
102- "leaq (%1,%4,4), %2 \n\t"
102+ "leaq (%1,%4,4), %2 \n\t"
103103
104- ".p2align 4 \n\t"
104+ ".p2align 4 \n\t"
105105
106- "1: \n\t"
107- "movsd (%1) , %%xmm4 \n\t"
108- "movhpd (%1,%4,1), %%xmm4 \n\t"
109- "movsd (%1,%4,2), %%xmm5 \n\t"
110- "movhpd (%1,%5,1), %%xmm5 \n\t"
106+ "1: \n\t"
107+ "movsd (%1) , %%xmm4 \n\t"
108+ "movhpd (%1,%4,1), %%xmm4 \n\t"
109+ "movsd (%1,%4,2), %%xmm5 \n\t"
110+ "movhpd (%1,%5,1), %%xmm5 \n\t"
111111
112- "movsd (%2) , %%xmm6 \n\t"
113- "movhpd (%2,%4,1), %%xmm6 \n\t"
114- "movsd (%2,%4,2), %%xmm7 \n\t"
115- "movhpd (%2,%5,1), %%xmm7 \n\t"
112+ "movsd (%2) , %%xmm6 \n\t"
113+ "movhpd (%2,%4,1), %%xmm6 \n\t"
114+ "movsd (%2,%4,2), %%xmm7 \n\t"
115+ "movhpd (%2,%5,1), %%xmm7 \n\t"
116116
117- "mulpd %%xmm0, %%xmm4 \n\t"
118- "mulpd %%xmm0, %%xmm5 \n\t"
119- "mulpd %%xmm0, %%xmm6 \n\t"
120- "mulpd %%xmm0, %%xmm7 \n\t"
117+ "mulpd %%xmm0, %%xmm4 \n\t"
118+ "mulpd %%xmm0, %%xmm5 \n\t"
119+ "mulpd %%xmm0, %%xmm6 \n\t"
120+ "mulpd %%xmm0, %%xmm7 \n\t"
121121
122- "movsd %%xmm4 , (%1) \n\t"
123- "movhpd %%xmm4 , (%1,%4,1) \n\t"
124- "movsd %%xmm5 , (%1,%4,2) \n\t"
125- "movhpd %%xmm5 , (%1,%5,1) \n\t"
122+ "movsd %%xmm4 , (%1) \n\t"
123+ "movhpd %%xmm4 , (%1,%4,1) \n\t"
124+ "movsd %%xmm5 , (%1,%4,2) \n\t"
125+ "movhpd %%xmm5 , (%1,%5,1) \n\t"
126126
127- "movsd %%xmm6 , (%2) \n\t"
128- "movhpd %%xmm6 , (%2,%4,1) \n\t"
129- "movsd %%xmm7 , (%2,%4,2) \n\t"
130- "movhpd %%xmm7 , (%2,%5,1) \n\t"
127+ "movsd %%xmm6 , (%2) \n\t"
128+ "movhpd %%xmm6 , (%2,%4,1) \n\t"
129+ "movsd %%xmm7 , (%2,%4,2) \n\t"
130+ "movhpd %%xmm7 , (%2,%5,1) \n\t"
131131
132- "leaq (%1,%4,8), %1 \n\t"
133- "leaq (%2,%4,8), %2 \n\t"
132+ "leaq (%1,%4,8), %1 \n\t"
133+ "leaq (%2,%4,8), %2 \n\t"
134134
135- "subq $8, %0 \n\t"
136- "jnz 1b \n\t"
135+ "subq $8, %0 \n\t"
136+ "jnz 1b \n\t"
137137
138138 :
139139 "+r" (n ), // 0
@@ -150,106 +150,96 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
150150 "%xmm12" , "%xmm13" , "%xmm14" , "%xmm15" ,
151151 "memory"
152152 );
153-
154-
155153}
156154
157155int CNAME (BLASLONG n , BLASLONG dummy0 , BLASLONG dummy1 , FLOAT da , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y , FLOAT * dummy , BLASLONG dummy2 )
158156{
159- BLASLONG i = 0 ,j = 0 ;
160-
161- if ( inc_x != 1 )
162- {
163-
164- if ( da == 0.0 )
165- {
166-
167- BLASLONG n1 = n & -2 ;
168-
169- while (j < n1 )
170- {
171-
172- if (isinf (x [i ])|| isnan (x [i ]))
173- x [i ]= NAN ;
174- else x [i ]= 0.0 ;
175- if (isinf (x [i + inc_x ])|| isnan (x [i + inc_x ]))
176- x [i + inc_x ]= NAN ;
177- else x [i + inc_x ]= 0.0 ;
178- i += 2 * inc_x ;
179- j += 2 ;
180-
181- }
182-
183- while (j < n )
184- {
185-
186- if (isinf (x [i ])|| isnan (x [i ]))
187- x [i ]= NAN ;
188- else x [i ]= 0.0 ;
189- i += inc_x ;
190- j ++ ;
191-
192- }
193- }
194- else
195- {
196-
197- BLASLONG n1 = n & -8 ;
198- if ( n1 > 0 )
199- {
200- dscal_kernel_inc_8 (n1 , & da , x , inc_x );
201- i = n1 * inc_x ;
202- j = n1 ;
203- }
204-
205- while (j < n )
206- {
207-
208- x [i ] *= da ;
209- i += inc_x ;
210- j ++ ;
211-
212- }
213-
214- }
215-
216- return (0 );
217- }
218-
219- BLASLONG n1 = n & -8 ;
220- if ( n1 > 0 )
221- {
222- // if ( da == 0.0 )
223- // dscal_kernel_8_zero(n1 , &da , x);
224- // else
225- dscal_kernel_8 (n1 , & da , x );
226- }
227-
228- if ( da == 0.0 )
229- {
230- for ( i = n1 ; i < n ; i ++ )
231- {
232- if (isinf (x [i ])|| isnan (x [i ]))
233- x [i ]= NAN ;
234- else x [i ] = 0.0 ;
235- }
236- }
237- else if (isinf (da )){
238- for ( i = n1 ; i < n ; i ++ )
239- if (x [i ]== 0. ) x [i ]= NAN ;
240- else x [i ] *=da ;
241- }
242- else
243- {
244-
245- for ( i = n1 ; i < n ; i ++ )
246- {
247- if (isinf (x [i ]))
248- x [i ]= NAN ;
249- else x [i ] *= da ;
250- }
251- }
252- return (0 );
157+ BLASLONG i = 0 , j = 0 ;
158+
159+ // Resolved issue 4728 when the caller is dscal
160+ if (dummy2 == 1 && da == 0.0 )
161+ {
162+ if ( inc_x != 1 )
163+ {
164+ BLASLONG n1 = n & -8 ;
165+ if ( n1 > 0 )
166+ {
167+ dscal_kernel_inc_8 (n1 , & da , x , inc_x );
168+ i = n1 * inc_x ;
169+ j = n1 ;
170+ }
171+ while (j < n )
172+ {
173+ x [i ] *= da ;
174+ i += inc_x ;
175+ j ++ ;
176+ }
177+ }
178+ else
179+ {
180+ BLASLONG n1 = n & -8 ;
181+ if ( n1 > 0 )
182+ dscal_kernel_8 (n1 , & da , x );
183+ for ( i = n1 ; i < n ; i ++ )
184+ x [i ] *= da ;
185+ }
186+ }
187+ else
188+ {
189+ if ( inc_x != 1 )
190+ {
191+ if ( da == 0.0 )
192+ {
193+ BLASLONG n1 = n & -2 ;
194+ while (j < n1 )
195+ {
196+ x [i ] = 0.0 ;
197+ x [i + inc_x ] = 0.0 ;
198+ i += 2 * inc_x ;
199+ j += 2 ;
200+ }
201+ while (j < n )
202+ {
203+ x [i ] = 0.0 ;
204+ i += inc_x ;
205+ j ++ ;
206+ }
207+ }
208+ else
209+ {
210+ BLASLONG n1 = n & -8 ;
211+ if ( n1 > 0 )
212+ {
213+ dscal_kernel_inc_8 (n1 , & da , x , inc_x );
214+ i = n1 * inc_x ;
215+ j = n1 ;
216+ }
217+ while (j < n )
218+ {
219+ x [i ] *= da ;
220+ i += inc_x ;
221+ j ++ ;
222+ }
223+ }
224+ }
225+ else
226+ {
227+ if ( da == 0.0 )
228+ {
229+ BLASLONG n1 = n & -8 ;
230+ if ( n1 > 0 )
231+ dscal_kernel_8_zero (n1 , & da , x );
232+ for ( i = n1 ; i < n ; i ++ )
233+ x [i ] = 0.0 ;
234+ }
235+ else
236+ {
237+ BLASLONG n1 = n & -8 ;
238+ if ( n1 > 0 )
239+ dscal_kernel_8 (n1 , & da , x );
240+ for ( i = n1 ; i < n ; i ++ )
241+ x [i ] *= da ;
242+ }
243+ }
244+ }
253245}
254-
255-
0 commit comments