Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 69db986

Browse files
1e-toshssf
authored andcommitted
Fix series perf tests in new style (#339)
* Fix series perf tests in new style * Small fixes * Small fixes
1 parent 47f94fe commit 69db986

2 files changed

Lines changed: 77 additions & 50 deletions

File tree

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3106,10 +3106,10 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No
31063106
sort_nona = numpy.argsort(self._data[~na_data_arr])
31073107
q = 0
31083108
for id, i in enumerate(sort):
3109-
if id not in list(sort[len(self._data) - na:]):
3110-
result[id] = sort_nona[id-q]
3111-
else:
3109+
if id in set(sort[len(self._data) - na:]):
31123110
q += 1
3111+
else:
3112+
result[id] = sort_nona[id - q]
31133113
for i in sort[len(self._data) - na:]:
31143114
result[i] = -1
31153115

@@ -3133,10 +3133,10 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order=
31333133
sort_nona = numpy.argsort(self._data[~na_data_arr])
31343134
q = 0
31353135
for id, i in enumerate(sort):
3136-
if id not in list(sort[len(self._data) - na:]):
3137-
result[id] = sort_nona[id - q]
3138-
else:
3136+
if id in set(sort[len(self._data) - na:]):
31393137
q += 1
3138+
else:
3139+
result[id] = sort_nona[id - q]
31403140
for i in sort[len(self._data) - na:]:
31413141
result[i] = -1
31423142

sdc/tests/tests_perf/test_perf_series.py

Lines changed: 71 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,19 @@
2424
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
2525
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
# *****************************************************************************
27+
2728
import pandas as pd
2829
import numpy as np
2930

30-
from sdc.tests.test_utils import *
31-
from sdc.tests.tests_perf.test_perf_base import *
32-
from sdc.tests.tests_perf.test_perf_utils import *
31+
import time
32+
import random
33+
34+
import pandas
35+
import sdc
3336

37+
from .test_perf_base import TestBase
38+
from sdc.tests.test_utils import test_global_input_data_float64
39+
from .test_perf_utils import calc_compilation, get_times, perf_data_gen_fixed_len
3440

3541
def usecase_series_min(input_data):
3642
start_time = time.time()
@@ -214,8 +220,8 @@ def usecase_series_dropna(input_data):
214220
finish_time = time.time()
215221

216222
return finish_time - start_time, res
217-
218-
223+
224+
219225
def usecase_series_chain_add_and_sum(A, B):
220226
start_time = time.time()
221227
res = (A + B).sum()
@@ -224,7 +230,7 @@ def usecase_series_chain_add_and_sum(A, B):
224230

225231
return res_time, res
226232

227-
233+
228234
# python -m sdc.runtests sdc.tests.tests_perf.test_perf_series.TestSeriesMethods
229235
class TestSeriesMethods(TestBase):
230236
@classmethod
@@ -257,24 +263,44 @@ def setUpClass(cls):
257263
'series_chain_add_and_sum': [20 * 10 ** 7, 25 * 10 ** 7, 30 * 10 ** 7],
258264
}
259265

260-
def _test_series(self, pyfunc, name, input_data=None):
261-
input_data = input_data or test_global_input_data_float64
262-
full_input_data_length = sum(len(i) for i in input_data)
263-
hpat_func = sdc.jit(pyfunc)
264-
for data_length in self.total_data_length[name]:
265-
data = perf_data_gen_fixed_len(input_data, full_input_data_length, data_length)
266-
test_data = pd.Series(data)
266+
def _test_jitted(self, pyfunc, record, *args, **kwargs):
267+
# compilation time
268+
record["compile_results"] = calc_compilation(pyfunc, *args, **kwargs)
267269

268-
compile_results = calc_compilation(pyfunc, test_data, iter_number=self.iter_number)
269-
# Warming up
270-
hpat_func(test_data)
270+
sdc_func = sdc.jit(pyfunc)
271271

272-
exec_times, boxing_times = get_times(hpat_func, test_data, iter_number=self.iter_number)
273-
self.test_results.add(name, 'JIT', test_data.size, exec_times, boxing_results=boxing_times,
274-
compile_results=compile_results)
272+
# Warming up
273+
sdc_func(*args, **kwargs)
275274

276-
exec_times, _ = get_times(pyfunc, test_data, iter_number=self.iter_number)
277-
self.test_results.add(name, 'Reference', test_data.size, test_results=exec_times)
275+
# execution and boxing time
276+
record["test_results"], record["boxing_results"] = \
277+
get_times(sdc_func, *args, **kwargs)
278+
279+
def _test_python(self, pyfunc, record, *args, **kwargs):
280+
record["test_results"], _ = \
281+
get_times(pyfunc, *args, **kwargs)
282+
283+
def _test_case(self, pyfunc, name):
284+
input_data = test_global_input_data_float64
285+
full_input_data_length = sum(len(i) for i in input_data)
286+
for data_length in self.total_data_length[name]:
287+
base = {
288+
"test_name": name,
289+
"data_size": data_length,
290+
}
291+
data = perf_data_gen_fixed_len(input_data, full_input_data_length,
292+
data_length)
293+
test_data = pandas.Series(data)
294+
295+
record = base.copy()
296+
record["test_type"] = 'SDC'
297+
self._test_jitted(pyfunc, record, test_data)
298+
self.test_results.add(**record)
299+
300+
record = base.copy()
301+
record["test_type"] = 'Python'
302+
self._test_python(pyfunc, record, test_data)
303+
self.test_results.add(**record)
278304

279305
def _test_series_binary_operations(self, pyfunc, name, input_data=None):
280306
np.random.seed(0)
@@ -299,73 +325,74 @@ def _test_series_binary_operations(self, pyfunc, name, input_data=None):
299325
self.test_results.add(name, 'Reference', A.size, exec_times, num_threads=self.num_threads)
300326

301327
def test_series_float_min(self):
302-
self._test_series(usecase_series_min, 'series_min')
328+
self._test_case(usecase_series_min, 'series_min')
303329

304330
def test_series_float_max(self):
305-
self._test_series(usecase_series_max, 'series_max')
331+
self._test_case(usecase_series_max, 'series_max')
306332

307333
def test_series_float_abs(self):
308-
self._test_series(usecase_series_abs, 'series_abs')
334+
self._test_case(usecase_series_abs, 'series_abs')
309335

310336
def test_series_float_value_counts(self):
311-
self._test_series(usecase_series_value_counts, 'series_value_counts')
337+
self._test_case(usecase_series_value_counts, 'series_value_counts')
312338

313339
def test_series_float_nsmallest(self):
314-
self._test_series(usecase_series_nsmallest, 'series_nsmallest')
340+
self._test_case(usecase_series_nsmallest, 'series_nsmallest')
315341

316342
def test_series_float_nlargest(self):
317-
self._test_series(usecase_series_nlargest, 'series_nlargest')
343+
self._test_case(usecase_series_nlargest, 'series_nlargest')
318344

319345
def test_series_float_var(self):
320-
self._test_series(usecase_series_var, 'series_var')
346+
self._test_case(usecase_series_var, 'series_var')
321347

322348
def test_series_float_shift(self):
323-
self._test_series(usecase_series_shift, 'series_shift')
349+
self._test_case(usecase_series_shift, 'series_shift')
324350

325351
def test_series_float_copy(self):
326-
self._test_series(usecase_series_shift, 'series_copy')
352+
self._test_case(usecase_series_shift, 'series_copy')
327353

328354
def test_series_float_sum(self):
329-
self._test_series(usecase_series_sum, 'series_sum')
355+
self._test_case(usecase_series_sum, 'series_sum')
330356

331357
def test_series_float_idxmax(self):
332-
self._test_series(usecase_series_idxmax, 'series_idxmax')
358+
self._test_case(usecase_series_idxmax, 'series_idxmax')
333359

334360
def test_series_float_idxmin(self):
335-
self._test_series(usecase_series_idxmin, 'series_idxmin')
361+
self._test_case(usecase_series_idxmin, 'series_idxmin')
336362

337363
def test_series_float_prod(self):
338-
self._test_series(usecase_series_prod, 'series_prod')
364+
self._test_case(usecase_series_prod, 'series_prod')
339365

340366
def test_series_float_quantile(self):
341-
self._test_series(usecase_series_quantile, 'series_quantile')
367+
self._test_case(usecase_series_quantile, 'series_quantile')
342368

343369
def test_series_float_mean(self):
344-
self._test_series(usecase_series_quantile, 'series_mean')
370+
self._test_case(usecase_series_quantile, 'series_mean')
345371

346372
def test_series_float_unique(self):
347-
self._test_series(usecase_series_unique, 'series_unique')
373+
self._test_case(usecase_series_unique, 'series_unique')
348374

349375
def test_series_float_cumsum(self):
350-
self._test_series(usecase_series_cumsum, 'series_cumsum')
376+
self._test_case(usecase_series_cumsum, 'series_cumsum')
351377

352378
def test_series_float_nunique(self):
353-
self._test_series(usecase_series_nunique, 'series_nunique')
379+
self._test_case(usecase_series_nunique, 'series_nunique')
354380

355381
def test_series_float_count(self):
356-
self._test_series(usecase_series_count, 'series_count')
382+
self._test_case(usecase_series_count, 'series_count')
357383

358384
def test_series_float_median(self):
359-
self._test_series(usecase_series_median, 'series_median')
385+
self._test_case(usecase_series_median, 'series_median')
360386

361387
def test_series_float_argsort(self):
362-
self._test_series(usecase_series_argsort, 'series_argsort')
388+
self._test_case(usecase_series_argsort, 'series_argsort')
363389

364390
def test_series_float_sort_values(self):
365-
self._test_series(usecase_series_sort_values, 'series_sort_values')
391+
self._test_case(usecase_series_sort_values, 'series_sort_values')
366392

367393
def test_series_float_dropna(self):
368-
self._test_series(usecase_series_dropna, 'series_dropna')
394+
self._test_case(usecase_series_dropna, 'series_dropna')
369395

370396
def test_series_chain_add_and_sum(self):
371397
self._test_series_binary_operations(usecase_series_chain_add_and_sum, 'series_chain_add_and_sum')
398+

0 commit comments

Comments
 (0)