Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 72896c4

Browse files
authored
Scale max(skipna=True) (#541)
Create numpy_nanmax() Move nanmax to sdc/functions/numpy_like.py Add nan_min_max_factory() Use numpy_like.nanmin() Min tests Correct implementation. Eliminates race condition. Add perf test for nanmin/max Add tests for nanmin/max
1 parent 12745a1 commit 72896c4

6 files changed

Lines changed: 141 additions & 31 deletions

File tree

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
create_str_arr_from_list, str_arr_set_na_by_mask)
6262
from sdc.utilities.utils import to_array, sdc_overload, sdc_overload_method, sdc_overload_attribute
6363
from sdc import sdc_autogenerated
64+
from sdc.functions import numpy_like
6465

6566
from .pandas_series_functions import apply
6667
from .pandas_series_functions import map as _map
@@ -3634,7 +3635,7 @@ def hpat_pandas_series_min_impl(self, axis=None, skipna=None, level=None, numeri
36343635
_skipna = skipna
36353636

36363637
if _skipna:
3637-
return numpy.nanmin(self._data)
3638+
return numpy_like.nanmin(self._data)
36383639

36393640
return self._data.min()
36403641

@@ -3715,7 +3716,7 @@ def hpat_pandas_series_max_impl(self, axis=None, skipna=None, level=None, numeri
37153716
_skipna = skipna
37163717

37173718
if _skipna:
3718-
return numpy.nanmax(self._data)
3719+
return numpy_like.nanmax(self._data)
37193720

37203721
return self._data.max()
37213722

sdc/functions/numpy_like.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,54 @@ def sdc_nansum_number_impl(self):
257257
return result
258258

259259
return sdc_nansum_number_impl
260+
261+
262+
def nanmin(a):
263+
pass
264+
265+
266+
def nanmax(a):
267+
pass
268+
269+
270+
def nan_min_max_overload_factory(reduce_op):
271+
def ov_impl(a):
272+
if not isinstance(a, types.Array):
273+
return
274+
275+
if isinstance(a.dtype, (types.Float, types.Complex)):
276+
isnan = get_isnan(a.dtype)
277+
initial_result = {
278+
min: numpy.inf,
279+
max: -numpy.inf,
280+
}[reduce_op]
281+
282+
def impl(a):
283+
result = initial_result
284+
nan_count = 0
285+
length = len(a)
286+
for i in prange(length):
287+
v = a[i]
288+
if not isnan(v):
289+
result = reduce_op(result, v)
290+
else:
291+
nan_count += 1
292+
293+
if nan_count == length:
294+
return numpy.nan
295+
296+
return result
297+
return impl
298+
else:
299+
def impl(a):
300+
result = a[0]
301+
for i in prange(len(a) - 1):
302+
result = reduce_op(result, a[i + 1])
303+
return result
304+
return impl
305+
306+
return ov_impl
307+
308+
309+
sdc_overload(nanmin)(nan_min_max_overload_factory(min))
310+
sdc_overload(nanmax)(nan_min_max_overload_factory(max))

sdc/tests/test_sdc_numpy.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -188,20 +188,47 @@ def sdc_impl(a):
188188
with self.subTest(data=case):
189189
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
190190

191-
def test_sum(self):
191+
192+
class TestArrayReductions(TestCase):
193+
194+
def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True):
195+
alt_cfunc = self.jit(alt_pyfunc)
196+
197+
def cases():
198+
yield np.array([5, 2, 0, 333, -4])
199+
yield np.array([3.3, 5.4, np.nan, 7.9, np.nan])
200+
yield np.float64([1.0, 2.0, 0.0, -0.0, 1.0, -1.5])
201+
yield np.float64([-0.0, -1.5])
202+
yield np.float64([-1.5, 2.5, 'inf'])
203+
yield np.float64([-1.5, 2.5, '-inf'])
204+
yield np.float64([-1.5, 2.5, 'inf', '-inf'])
205+
yield np.float64(['nan', -1.5, 2.5, 'nan', 3.0])
206+
yield np.float64(['nan', -1.5, 2.5, 'nan', 'inf', '-inf', 3.0])
207+
if all_nans:
208+
# Only NaNs
209+
yield np.float64(['nan', 'nan'])
210+
211+
for case in cases():
212+
with self.subTest(data=case):
213+
np.testing.assert_array_equal(alt_cfunc(case), pyfunc(case))
214+
215+
def test_nanmin(self):
192216
def ref_impl(a):
193-
return np.sum(a)
217+
return np.nanmin(a)
194218

195219
def sdc_impl(a):
196-
return numpy_like.sum(a)
220+
return numpy_like.nanmin(a)
197221

198-
sdc_func = self.jit(sdc_impl)
222+
self.check_reduction_basic(ref_impl, sdc_impl)
199223

200-
cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
201-
for case in cases:
202-
a = np.array(case)
203-
with self.subTest(data=case):
204-
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
224+
def test_nanmax(self):
225+
def ref_impl(a):
226+
return np.nanmax(a)
227+
228+
def sdc_impl(a):
229+
return numpy_like.nanmax(a)
230+
231+
self.check_reduction_basic(ref_impl, sdc_impl)
205232

206233
def test_nansum(self):
207234
def ref_impl(a):
@@ -210,13 +237,13 @@ def ref_impl(a):
210237
def sdc_impl(a):
211238
return numpy_like.nansum(a)
212239

213-
sdc_func = self.jit(sdc_impl)
240+
self.check_reduction_basic(ref_impl, sdc_impl)
214241

215-
cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
216-
for case in cases:
217-
a = np.array(case)
218-
with self.subTest(data=case):
219-
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
242+
def test_sum(self):
243+
def ref_impl(a):
244+
return np.sum(a)
245+
246+
def sdc_impl(a):
247+
return numpy_like.sum(a)
220248

221-
if __name__ == "__main__":
222-
unittest.main()
249+
self.check_reduction_basic(ref_impl, sdc_impl)

sdc/tests/test_series.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2470,9 +2470,12 @@ def test_impl(S):
24702470
hpat_func = self.jit(test_impl)
24712471

24722472
# TODO type_min/type_max
2473-
for input_data in [[np.nan, 2., np.nan, 3., np.inf, 1, -1000],
2474-
[8, 31, 1123, -1024],
2475-
[2., 3., 1, -1000, np.inf]]:
2473+
for input_data in [
2474+
[np.nan, 2., np.nan, 3., np.inf, 1, -1000],
2475+
[8, 31, 1123, -1024],
2476+
[2., 3., 1, -1000, np.inf],
2477+
[np.nan, np.nan, np.inf, np.nan],
2478+
]:
24762479
S = pd.Series(input_data)
24772480

24782481
result_ref = test_impl(S)
@@ -2500,14 +2503,32 @@ def test_impl(S):
25002503
hpat_func = self.jit(test_impl)
25012504

25022505
# TODO type_min/type_max
2503-
for input_data in [[np.nan, 2., np.nan, 3., np.inf, 1, -1000],
2504-
[8, 31, 1123, -1024],
2505-
[2., 3., 1, -1000, np.inf]]:
2506-
S = pd.Series(input_data)
2506+
for input_data in [
2507+
[np.nan, 2., np.nan, 3., np.inf, 1, -1000],
2508+
[8, 31, 1123, -1024],
2509+
[2., 3., 1, -1000, np.inf],
2510+
[np.inf, np.inf, np.inf, np.inf],
2511+
[np.inf, np.nan, np.nan, np.nan],
25072512

2508-
result_ref = test_impl(S)
2509-
result = hpat_func(S)
2510-
self.assertEqual(result, result_ref)
2513+
[np.nan, np.nan, np.nan, np.nan],
2514+
[np.nan, 1.0, np.nan, np.nan],
2515+
[np.nan, 1.0, 1.0, np.nan],
2516+
2517+
[np.nan, np.nan, 1.0, np.nan],
2518+
[np.nan, np.nan, 1.0, np.nan, np.nan],
2519+
2520+
[np.nan, np.nan, np.inf, np.nan],
2521+
[np.nan, np.nan, np.inf, np.nan, np.nan],
2522+
2523+
[np.nan, np.nan, np.nan, np.inf],
2524+
np.arange(11),
2525+
]:
2526+
with self.subTest(data=input_data):
2527+
S = pd.Series(input_data)
2528+
2529+
result_ref = test_impl(S)
2530+
result = hpat_func(S)
2531+
np.testing.assert_equal(result, result_ref)
25112532

25122533
@skip_sdc_jit("Series.max() any parameters unsupported")
25132534
def test_series_max_param(self):

sdc/tests/tests_perf/test_perf_numpy.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
113113
CE(type_='Numba', code='np.sum(data)', jitted=True),
114114
CE(type_='SDC', code='sdc.functions.numpy_like.sum(data)', jitted=True),
115115
], usecase_params='data'),
116+
TC(name='nanmin', size=[10 ** 7], call_expr=[
117+
CE(type_='Python', code='np.nanmin(data)', jitted=False),
118+
CE(type_='Numba', code='np.nanmin(data)', jitted=True),
119+
CE(type_='SDC', code='sdc.functions.numpy_like.nanmin(data)', jitted=True),
120+
], usecase_params='data'),
121+
TC(name='nanmax', size=[10 ** 7], call_expr=[
122+
CE(type_='Python', code='np.nanmax(data)', jitted=False),
123+
CE(type_='Numba', code='np.nanmax(data)', jitted=True),
124+
CE(type_='SDC', code='sdc.functions.numpy_like.nanmax(data)', jitted=True),
125+
], usecase_params='data'),
116126
]
117127

118128
generate_test_cases(cases, TestFunctions, 'function')

sdc/tests/tests_perf/test_perf_series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
115115
TC(name='max', size=[10 ** 8], params='skipna=False'),
116116
TC(name='mean', size=[10 ** 8]),
117117
TC(name='median', size=[10 ** 8]),
118-
TC(name='min', size=[10 ** 8]),
119-
TC(name='min', size=[10 ** 7], params='skipna=False'),
118+
TC(name='min', size=[10 ** 8], params='skipna=True'),
119+
TC(name='min', size=[10 ** 8], params='skipna=False'),
120120
TC(name='mod', size=[10 ** 7], params='other', data_num=2),
121121
TC(name='mul', size=[10 ** 7], params='other', data_num=2),
122122
TC(name='ndim', size=[10 ** 7], call_expr='data.ndim', usecase_params='data'),

0 commit comments

Comments
 (0)