Skip to content

Commit c9185e9

Browse files
authored
Make GEMM_DIVIDE_RATE and GEMM_PREFERRED_SIZE available in DYNAMIC_ARCH builds
1 parent 0dd501d commit c9185e9

4 files changed

Lines changed: 29 additions & 17 deletions

File tree

driver/level3/gemm.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,21 @@
5959
#define GEMM_Q 128
6060
#endif
6161

62-
#ifdef GEMM_DIVIDE_RATE
62+
#ifdef DYNAMIC_ARCH
63+
#define DIVIDE_LIMIT gotoblas->divide_limit
64+
#define DIVIDE_RATE gotoblas->divide_rate
65+
#else
66+
#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
6367
#define DIVIDE_RATE GEMM_DIVIDE_RATE
6468
#endif
6569

66-
#ifdef DYNAMIC_ARCH
67-
#define GEMM_DIVIDE_LIMIT gotoblas->divide_limit
68-
#endif
70+
//#ifdef GEMM_DIVIDE_RATE
71+
//#define DIVIDE_RATE GEMM_DIVIDE_RATE
72+
//#endif
6973

70-
#ifdef GEMM_DIVIDE_LIMIT
71-
#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
72-
#endif
74+
//#ifdef GEMM_DIVIDE_LIMIT
75+
//#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
76+
//#endif
7377

7478
#ifdef THREADED_LEVEL3
7579
#include "level3_thread.c"

driver/level3/level3_gemm3m_thread.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#define CACHE_LINE_SIZE 8
4242
#endif
4343

44+
#define DIVIDE_RATE_MAX 2
4445
#ifndef DIVIDE_RATE
4546
#define DIVIDE_RATE 2
4647
#endif
@@ -93,7 +94,7 @@ typedef struct {
9394
#else
9495
volatile
9596
#endif
96-
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
97+
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
9798
} job_t;
9899

99100

@@ -294,7 +295,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
294295
FLOAT *a, *b, *c;
295296
job_t *job = (job_t *)args -> common;
296297
BLASLONG xxx, bufferside;
297-
FLOAT *buffer[DIVIDE_RATE];
298+
FLOAT *buffer[DIVIDE_RATE_MAX];
298299

299300
BLASLONG ls, min_l, jjs, min_jj;
300301
BLASLONG is, min_i, div_n;

driver/level3/level3_syrk_threaded.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
#define CACHE_LINE_SIZE 8
4242
#endif
4343

44+
#define DIVIDE_RATE_MAX 2
45+
4446
#ifndef DIVIDE_RATE
4547
#define DIVIDE_RATE 2
4648
#endif
@@ -69,7 +71,7 @@ _Atomic
6971
#else
7072
volatile
7173
#endif
72-
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
74+
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
7375
} job_t;
7476

7577

@@ -133,7 +135,7 @@ _Atomic
133135

134136
static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){
135137

136-
FLOAT *buffer[DIVIDE_RATE];
138+
FLOAT *buffer[DIVIDE_RATE_MAX];
137139

138140
BLASLONG k, lda, ldc;
139141
BLASLONG m_from, m_to, n_from, n_to;

driver/level3/level3_thread.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,17 @@
4141
#define CACHE_LINE_SIZE 8
4242
#endif
4343

44+
#define DIVIDE_RATE_MAX 2
45+
4446
#ifndef DIVIDE_RATE
4547
#define DIVIDE_RATE 2
4648
#endif
4749

48-
#ifndef GEMM_PREFERED_SIZE
49-
#define GEMM_PREFERED_SIZE 1
50+
#ifdef DYNAMIC_ARCH
51+
#define GEMM_PREFERRED_SIZE gotoblas->preferred_size
52+
#endif
53+
#ifndef GEMM_PREFERRED_SIZE
54+
#define GEMM_PREFERRED_SIZE 1
5055
#endif
5156

5257
//The array of job_t may overflow the stack.
@@ -93,7 +98,7 @@
9398

9499
typedef struct {
95100
volatile
96-
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
101+
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
97102
} job_t;
98103

99104

@@ -234,7 +239,7 @@ typedef struct {
234239

235240
static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){
236241

237-
IFLOAT *buffer[DIVIDE_RATE];
242+
IFLOAT *buffer[DIVIDE_RATE_MAX];
238243

239244
BLASLONG k, lda, ldb, ldc;
240245
BLASLONG m_from, m_to, n_from, n_to;
@@ -707,7 +712,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
707712
while (m > 0){
708713
width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts);
709714

710-
width = round_up(m, width, GEMM_PREFERED_SIZE);
715+
width = round_up(m, width, GEMM_PREFERRED_SIZE);
711716

712717
m -= width;
713718

@@ -758,7 +763,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
758763
if (width < switch_ratio) {
759764
width = switch_ratio;
760765
}
761-
width = round_up(width_n, width, GEMM_PREFERED_SIZE);
766+
width = round_up(width_n, width, GEMM_PREFERRED_SIZE);
762767

763768
width_n -= width;
764769
if (width_n < 0) {

0 commit comments

Comments
 (0)