Skip to content

Commit 6f672df

Browse files
committed
Use generic DDOT kernel for WASM128_GENERIC
1 parent 6bb0dbf commit 6f672df

2 files changed

Lines changed: 45 additions & 12 deletions

File tree

kernel/generic/dot.c

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,46 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
4747

4848
if ( (inc_x == 1) && (inc_y == 1) )
4949
{
50-
#if V_SIMD && !defined(DSDOT)
51-
const int vstep = v_nlanes_f32;
52-
const int unrollx4 = n & (-vstep * 4);
53-
const int unrollx = n & -vstep;
54-
v_f32 vsum0 = v_zero_f32();
50+
#if defined(DOUBLE) && V_SIMD && V_SIMD_F64 && !defined(DSDOT)
51+
const int vstep = v_nlanes_f64;
52+
const int unrollx4 = n & (-vstep * 4);
53+
const int unrollx = n & -vstep;
54+
v_f64 vsum0 = v_zero_f64();
55+
v_f64 vsum1 = v_zero_f64();
56+
v_f64 vsum2 = v_zero_f64();
57+
v_f64 vsum3 = v_zero_f64();
58+
while(i < unrollx4)
59+
{
60+
vsum0 = v_muladd_f64(
61+
v_loadu_f64(x + i), v_loadu_f64(y + i), vsum0
62+
);
63+
vsum1 = v_muladd_f64(
64+
v_loadu_f64(x + i + vstep), v_loadu_f64(y + i + vstep), vsum1
65+
);
66+
vsum2 = v_muladd_f64(
67+
v_loadu_f64(x + i + vstep*2), v_loadu_f64(y + i + vstep*2), vsum2
68+
);
69+
vsum3 = v_muladd_f64(
70+
v_loadu_f64(x + i + vstep*3), v_loadu_f64(y + i + vstep*3), vsum3
71+
);
72+
i += vstep*4;
73+
}
74+
vsum0 = v_add_f64(
75+
v_add_f64(vsum0, vsum1), v_add_f64(vsum2 , vsum3)
76+
);
77+
while(i < unrollx)
78+
{
79+
vsum0 = v_muladd_f64(
80+
v_loadu_f64(x + i), v_loadu_f64(y + i), vsum0
81+
);
82+
i += vstep;
83+
}
84+
dot = v_sum_f64(vsum0);
85+
#elif V_SIMD && !defined(DSDOT)
86+
const int vstep = v_nlanes_f32;
87+
const int unrollx4 = n & (-vstep * 4);
88+
const int unrollx = n & -vstep;
89+
v_f32 vsum0 = v_zero_f32();
5590
v_f32 vsum1 = v_zero_f32();
5691
v_f32 vsum2 = v_zero_f32();
5792
v_f32 vsum3 = v_zero_f32();
@@ -82,10 +117,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
82117
i += vstep;
83118
}
84119
dot = v_sum_f32(vsum0);
85-
#elif defined(DSDOT)
86-
int n1 = n & -4;
87-
for (; i < n1; i += 4)
88-
{
120+
#elif defined(DSDOT)
121+
int n1 = n & -4;
122+
for (; i < n1; i += 4)
123+
{
89124
dot += (double) y[i] * (double) x[i]
90125
+ (double) y[i+1] * (double) x[i+1]
91126
+ (double) y[i+2] * (double) x[i+2]
@@ -133,5 +168,3 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
133168
return(dot);
134169

135170
}
136-
137-

kernel/wasm/KERNEL.WASM128_GENERIC

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ CCOPYKERNEL = ../riscv64/zcopy.c
5656
ZCOPYKERNEL = ../riscv64/zcopy.c
5757

5858
SDOTKERNEL = ../generic/dot.c
59-
DDOTKERNEL = ../riscv64/dot.c
59+
DDOTKERNEL = ../generic/dot.c
6060
CDOTKERNEL = ../riscv64/zdot.c
6161
ZDOTKERNEL = ../riscv64/zdot.c
6262
DSDOTKERNEL = ../generic/dot.c

0 commit comments

Comments
 (0)