Skip to content

Commit a89142f

Browse files
authored
Merge pull request #5688 from martin-frbg/divlimit_dyn
Make PREFERRED_SIZE, GEMM_DIVIDE_LIMIT and _RATE available to DYNAMIC_ARCH builds
2 parents afcf70d + c9185e9 commit a89142f

7 files changed

Lines changed: 67 additions & 30 deletions

File tree

common_param.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
typedef struct {
4848
int dtb_entries;
4949
int switch_ratio;
50+
int divide_rate;
51+
int divide_limit;
52+
int preferred_size;
5053
int offsetA, offsetB, align;
5154
#if BUILD_HFLOAT16 == 1
5255
int shgemm_p, shgemm_q, shgemm_r;

driver/level3/gemm.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,21 @@
5959
#define GEMM_Q 128
6060
#endif
6161

62-
#ifdef GEMM_DIVIDE_RATE
62+
#ifdef DYNAMIC_ARCH
63+
#define DIVIDE_LIMIT gotoblas->divide_limit
64+
#define DIVIDE_RATE gotoblas->divide_rate
65+
#else
66+
#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
6367
#define DIVIDE_RATE GEMM_DIVIDE_RATE
6468
#endif
6569

66-
#ifdef GEMM_DIVIDE_LIMIT
67-
#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
68-
#endif
70+
//#ifdef GEMM_DIVIDE_RATE
71+
//#define DIVIDE_RATE GEMM_DIVIDE_RATE
72+
//#endif
73+
74+
//#ifdef GEMM_DIVIDE_LIMIT
75+
//#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
76+
//#endif
6977

7078
#ifdef THREADED_LEVEL3
7179
#include "level3_thread.c"

driver/level3/level3_gemm3m_thread.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#define CACHE_LINE_SIZE 8
4242
#endif
4343

44+
#define DIVIDE_RATE_MAX 2
4445
#ifndef DIVIDE_RATE
4546
#define DIVIDE_RATE 2
4647
#endif
@@ -93,7 +94,7 @@ typedef struct {
9394
#else
9495
volatile
9596
#endif
96-
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
97+
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
9798
} job_t;
9899

99100

@@ -294,7 +295,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
294295
FLOAT *a, *b, *c;
295296
job_t *job = (job_t *)args -> common;
296297
BLASLONG xxx, bufferside;
297-
FLOAT *buffer[DIVIDE_RATE];
298+
FLOAT *buffer[DIVIDE_RATE_MAX];
298299

299300
BLASLONG ls, min_l, jjs, min_jj;
300301
BLASLONG is, min_i, div_n;

driver/level3/level3_syrk_threaded.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
#define CACHE_LINE_SIZE 8
4242
#endif
4343

44+
#define DIVIDE_RATE_MAX 2
45+
4446
#ifndef DIVIDE_RATE
4547
#define DIVIDE_RATE 2
4648
#endif
@@ -69,7 +71,7 @@ _Atomic
6971
#else
7072
volatile
7173
#endif
72-
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
74+
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
7375
} job_t;
7476

7577

@@ -133,7 +135,7 @@ _Atomic
133135

134136
static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){
135137

136-
FLOAT *buffer[DIVIDE_RATE];
138+
FLOAT *buffer[DIVIDE_RATE_MAX];
137139

138140
BLASLONG k, lda, ldc;
139141
BLASLONG m_from, m_to, n_from, n_to;

driver/level3/level3_thread.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,17 @@
4141
#define CACHE_LINE_SIZE 8
4242
#endif
4343

44+
#define DIVIDE_RATE_MAX 2
45+
4446
#ifndef DIVIDE_RATE
4547
#define DIVIDE_RATE 2
4648
#endif
4749

48-
#ifndef GEMM_PREFERED_SIZE
49-
#define GEMM_PREFERED_SIZE 1
50+
#ifdef DYNAMIC_ARCH
51+
#define GEMM_PREFERRED_SIZE gotoblas->preferred_size
52+
#endif
53+
#ifndef GEMM_PREFERRED_SIZE
54+
#define GEMM_PREFERRED_SIZE 1
5055
#endif
5156

5257
//The array of job_t may overflow the stack.
@@ -93,7 +98,7 @@
9398

9499
typedef struct {
95100
volatile
96-
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
101+
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
97102
} job_t;
98103

99104

@@ -234,7 +239,7 @@ typedef struct {
234239

235240
static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){
236241

237-
IFLOAT *buffer[DIVIDE_RATE];
242+
IFLOAT *buffer[DIVIDE_RATE_MAX];
238243

239244
BLASLONG k, lda, ldb, ldc;
240245
BLASLONG m_from, m_to, n_from, n_to;
@@ -707,7 +712,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
707712
while (m > 0){
708713
width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts);
709714

710-
width = round_up(m, width, GEMM_PREFERED_SIZE);
715+
width = round_up(m, width, GEMM_PREFERRED_SIZE);
711716

712717
m -= width;
713718

@@ -758,7 +763,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
758763
if (width < switch_ratio) {
759764
width = switch_ratio;
760765
}
761-
width = round_up(width_n, width, GEMM_PREFERED_SIZE);
766+
width = round_up(width_n, width, GEMM_PREFERRED_SIZE);
762767

763768
width_n -= width;
764769
if (width_n < 0) {

kernel/setparam-ref.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ gotoblas_t TABLE_NAME = {
5454

5555
SWITCH_RATIO,
5656

57+
GEMM_DIVIDE_RATE,
58+
59+
GEMM_DIVIDE_LIMIT,
60+
61+
GEMM_PREFERRED_SIZE,
62+
5763
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
5864

5965
#ifdef BUILD_HFLOAT16

param.h

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -630,10 +630,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
630630

631631
#if defined(XDOUBLE) || defined(DOUBLE)
632632
#define SWITCH_RATIO 4
633-
#define GEMM_PREFERED_SIZE 4
633+
#define GEMM_PREFERRED_SIZE 4
634634
#else
635635
#define SWITCH_RATIO 8
636-
#define GEMM_PREFERED_SIZE 8
636+
#define GEMM_PREFERRED_SIZE 8
637637
#endif
638638

639639
#ifdef ARCH_X86
@@ -1539,10 +1539,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15391539

15401540
#if defined(XDOUBLE) || defined(DOUBLE)
15411541
#define SWITCH_RATIO 4
1542-
#define GEMM_PREFERED_SIZE 4
1542+
#define GEMM_PREFERRED_SIZE 4
15431543
#else
15441544
#define SWITCH_RATIO 8
1545-
#define GEMM_PREFERED_SIZE 8
1545+
#define GEMM_PREFERRED_SIZE 8
15461546
#endif
15471547

15481548
#ifdef ARCH_X86
@@ -1665,10 +1665,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16651665

16661666
#if defined(XDOUBLE) || defined(DOUBLE)
16671667
#define SWITCH_RATIO 8
1668-
#define GEMM_PREFERED_SIZE 8
1668+
#define GEMM_PREFERRED_SIZE 8
16691669
#else
16701670
#define SWITCH_RATIO 16
1671-
#define GEMM_PREFERED_SIZE 16
1671+
#define GEMM_PREFERRED_SIZE 16
16721672
#endif
16731673
#define USE_SGEMM_KERNEL_DIRECT 1
16741674

@@ -1786,10 +1786,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17861786

17871787
#if defined(XDOUBLE) || defined(DOUBLE)
17881788
#define SWITCH_RATIO 8
1789-
#define GEMM_PREFERED_SIZE 8
1789+
#define GEMM_PREFERRED_SIZE 8
17901790
#else
17911791
#define SWITCH_RATIO 16
1792-
#define GEMM_PREFERED_SIZE 16
1792+
#define GEMM_PREFERRED_SIZE 16
17931793
#endif
17941794
#define USE_SGEMM_KERNEL_DIRECT 1
17951795

@@ -1919,10 +1919,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19191919

19201920
#if defined(XDOUBLE) || defined(DOUBLE)
19211921
#define SWITCH_RATIO 8
1922-
#define GEMM_PREFERED_SIZE 8
1922+
#define GEMM_PREFERRED_SIZE 8
19231923
#else
19241924
#define SWITCH_RATIO 16
1925-
#define GEMM_PREFERED_SIZE 16
1925+
#define GEMM_PREFERRED_SIZE 16
19261926
#endif
19271927
#define USE_SGEMM_KERNEL_DIRECT 1
19281928

@@ -2577,7 +2577,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25772577
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
25782578

25792579
#define SWITCH_RATIO 16
2580-
#define GEMM_PREFERED_SIZE 16
2580+
#define GEMM_PREFERRED_SIZE 16
25812581

25822582
#define SGEMM_DEFAULT_UNROLL_M 16
25832583
#define SGEMM_DEFAULT_UNROLL_N 8
@@ -2616,7 +2616,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26162616
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
26172617

26182618
#define SWITCH_RATIO 16
2619-
#define GEMM_PREFERED_SIZE 16
2619+
#define GEMM_PREFERRED_SIZE 16
26202620

26212621
#define SGEMM_DEFAULT_UNROLL_M 16
26222622
#define SGEMM_DEFAULT_UNROLL_N 8
@@ -3611,10 +3611,10 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
36113611

36123612
#if defined(XDOUBLE) || defined(DOUBLE)
36133613
#define SWITCH_RATIO 8
3614-
#define GEMM_PREFERED_SIZE 4
3614+
#define GEMM_PREFERRED_SIZE 4
36153615
#else
36163616
#define SWITCH_RATIO 16
3617-
#define GEMM_PREFERED_SIZE 8
3617+
#define GEMM_PREFERRED_SIZE 8
36183618
#endif
36193619

36203620
#undef BGEMM_ALIGN_K
@@ -3749,9 +3749,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
37493749
#define GEMM_DIVIDE_RATE 1
37503750

37513751
#if defined(XDOUBLE) || defined(DOUBLE)
3752-
#define GEMM_PREFERED_SIZE 8
3752+
#define GEMM_PREFERRED_SIZE 8
37533753
#else
3754-
#define GEMM_PREFERED_SIZE 16
3754+
#define GEMM_PREFERRED_SIZE 16
37553755
#endif
37563756

37573757
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
@@ -4258,6 +4258,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
42584258
#define SWITCH_RATIO 2
42594259
#endif
42604260

4261+
#ifndef GEMM_DIVIDE_RATE
4262+
#define GEMM_DIVIDE_RATE 2
4263+
#endif
4264+
4265+
#ifndef GEMM_DIVIDE_LIMIT
4266+
#define GEMM_DIVIDE_LIMIT 0
4267+
#endif
4268+
4269+
#ifndef GEMM_PREFERRED_SIZE
4270+
#define GEMM_PREFERRED_SIZE 1
4271+
#endif
4272+
42614273
#ifndef QGEMM_DEFAULT_UNROLL_M
42624274
#define QGEMM_DEFAULT_UNROLL_M 2
42634275
#endif

0 commit comments

Comments
 (0)