@@ -52,17 +52,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5252 bge $r0, N, .L999
5353 bge $r0, INCX, .L999
5454 li.d TEMP, 1
55+ ld.d t1, $sp, 0 // Load dummp2
5556 movgr2fr.d a1, $r0
5657 FFINT a1, a1
5758 movgr2fr.d a2, TEMP
5859 FFINT a2, a2
5960 slli.d TEMP, TEMP, BASE_SHIFT
6061 slli.d INCX, INCX, BASE_SHIFT
62+ slli.d t1, t1, BASE_SHIFT
63+ CMPEQ $fcc0, ALPHA, a1
64+ bcnez $fcc0, .L20 //ALPHA==0
6165 CMPEQ $fcc0, ALPHA, a2
6266 bcnez $fcc0, .L999 //ALPHA==1 return
63-
67+ .L1:
6468 srai.d I, N, 3
65- beq INCX, TEMP, .L30 //ALPHA!= 1 and INCX==1
69+ beq INCX, TEMP, .L30 //ALPHA !=0| 1 and INCX==1
6670 MTG TEMP, ALPHA
6771#ifdef DOUBLE
6872 xvreplgr2vr.d VALPHA, TEMP
@@ -72,7 +76,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7276 move XX, X
7377 .align 3
7478
75- .L10: //ALPHA!= 1 and INCX!=1
79+ .L10: //ALPHA !=0| 1 and INCX!=1
7680 bge $r0, I, .L32
7781 .align 3
7882.L11:
@@ -165,6 +169,75 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
165169 blt $r0, I, .L11
166170 b .L32
167171 .align 3
172+
173+ .L20:
174+ beq t1, TEMP, .L1 // if dummp2 == 1, do not directly copy 0
175+ srai.d I, N, 3
176+ beq INCX, TEMP, .L24
177+ bge $r0, I, .L22
178+ .align 3
179+
180+ .L21:
181+ ST a1, X, 0
182+ add .d X, X, INCX
183+ ST a1, X, 0
184+ add .d X, X, INCX
185+ ST a1, X, 0
186+ add .d X, X, INCX
187+ ST a1, X, 0
188+ add .d X, X, INCX
189+ ST a1, X, 0
190+ add .d X, X, INCX
191+ ST a1, X, 0
192+ add .d X, X, INCX
193+ ST a1, X, 0
194+ add .d X, X, INCX
195+ ST a1, X, 0
196+ add .d X, X, INCX
197+ addi.d I, I, -1
198+ blt $r0, I, .L21
199+ .align 3
200+
201+ .L22:
202+ andi I, N, 7
203+ bge $r0, I, .L999
204+ .align 3
205+
206+ .L23:
207+ ST a1, X, 0 * SIZE
208+ addi.d I, I, -1
209+ add .d X, X, INCX
210+ blt $r0, I, .L23
211+ jirl $r0, $r1, 0
212+ .align 3
213+
214+ .L24:
215+ bge $r0, I, .L26 /*N<8 INCX==1*/
216+ .align 3
217+ .L25:
218+ xvxor.v VX0, VX0, VX0
219+ xvst VX0, X, 0 * SIZE
220+ #ifdef DOUBLE
221+ xvst VX0, X, 4 * SIZE
222+ #endif
223+ addi.d I, I, -1
224+ addi.d X, X, 8 * SIZE
225+ blt $r0, I, .L25
226+ .align 3
227+
228+ .L26:
229+ andi I, N, 7
230+ bge $r0, I, .L999
231+ .align 3
232+
233+ .L27:
234+ ST a1, X, 0 * SIZE
235+ addi.d I, I, -1
236+ addi.d X, X, SIZE
237+ blt $r0, I, .L27
238+ jirl $r0, $r1, 0
239+ .align 3
240+
168241.L30:
169242 bge $r0, I, .L32/*N<8 INCX==1*/
170243 MTG TEMP, ALPHA
0 commit comments