Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 78b03a1

Browse files
Sum/nansum numpy-like impl (#570)
* Sum/nansum impl * Use numba isnan dtype Co-authored-by: Alexander Kalistratov <alexander.kalistratov@intel.com>
1 parent 1b1a7bc commit 78b03a1

3 files changed

Lines changed: 106 additions & 0 deletions

File tree

sdc/functions/numpy_like.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
from numba import types, jit, prange, numpy_support, literally
3838
from numba.errors import TypingError
39+
from numba.targets.arraymath import get_isnan
3940

4041
import sdc
4142
from sdc.utilities.sdc_typing_utils import TypeChecker
@@ -47,6 +48,14 @@ def astype(self, dtype):
4748
pass
4849

4950

51+
def sum(self):
52+
pass
53+
54+
55+
def nansum(self):
56+
pass
57+
58+
5059
@sdc_overload(astype)
5160
def sdc_astype_overload(self, dtype):
5261
"""
@@ -106,3 +115,61 @@ def sdc_astype_number_impl(self, dtype):
106115
return sdc_astype_number_impl
107116

108117
ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype')
118+
119+
120+
@sdc_overload(sum)
121+
def sdc_sum_overload(self):
122+
"""
123+
Intel Scalable Dataframe Compiler Developer Guide
124+
*************************************************
125+
Parallel replacement of numpy.sum.
126+
.. only:: developer
127+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k sum
128+
"""
129+
130+
dtype = self.dtype
131+
isnan = get_isnan(dtype)
132+
if not isinstance(self, types.Array):
133+
return None
134+
135+
if isinstance(dtype, types.Number):
136+
def sdc_sum_number_impl(self):
137+
length = len(self)
138+
result = 0
139+
for i in prange(length):
140+
if not isnan(self[i]):
141+
result += self[i]
142+
else:
143+
return numpy.nan
144+
145+
return result
146+
147+
return sdc_sum_number_impl
148+
149+
150+
@sdc_overload(nansum)
151+
def sdc_sum_overload(self):
152+
"""
153+
Intel Scalable Dataframe Compiler Developer Guide
154+
*************************************************
155+
Parallel replacement of numpy.nansum.
156+
.. only:: developer
157+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k nansum
158+
"""
159+
160+
dtype = self.dtype
161+
isnan = get_isnan(dtype)
162+
if not isinstance(self, types.Array):
163+
return None
164+
165+
if isinstance(dtype, types.Number):
166+
def sdc_nansum_number_impl(self):
167+
length = len(self)
168+
result = 0
169+
for i in prange(length):
170+
if not numpy.isnan(self[i]):
171+
result += self[i]
172+
173+
return result
174+
175+
return sdc_nansum_number_impl

sdc/tests/test_sdc_numpy.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,5 +142,35 @@ def sdc_impl(a, t):
142142
with self.subTest(data=case, type=type_):
143143
np.testing.assert_array_equal(sdc_func(a, type_), ref_impl(a, type_))
144144

145+
def test_nansum(self):
146+
def ref_impl(a):
147+
return np.nansum(a)
148+
149+
def sdc_impl(a):
150+
return numpy_like.nansum(a)
151+
152+
sdc_func = self.jit(sdc_impl)
153+
154+
cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
155+
for case in cases:
156+
a = np.array(case)
157+
with self.subTest(data=case):
158+
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
159+
160+
def test_sum(self):
161+
def ref_impl(a):
162+
return np.sum(a)
163+
164+
def sdc_impl(a):
165+
return numpy_like.sum(a)
166+
167+
sdc_func = self.jit(sdc_impl)
168+
169+
cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
170+
for case in cases:
171+
a = np.array(case)
172+
with self.subTest(data=case):
173+
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
174+
145175
if __name__ == "__main__":
146176
unittest.main()

sdc/tests/tests_perf/test_perf_numpy.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,15 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
9999
CE(type_='Numba', code='data.astype(np.int64)', jitted=True),
100100
CE(type_='SDC', code='sdc.functions.numpy_like.astype(data, np.int64)', jitted=True),
101101
], usecase_params='data'),
102+
TC(name='nansum', size=[10 ** 7], call_expr=[
103+
CE(type_='Python', code='np.nansum(data)', jitted=False),
104+
CE(type_='SDC', code='sdc.functions.numpy_like.nansum(data)', jitted=True),
105+
], usecase_params='data'),
106+
TC(name='sum', size=[10 ** 7], call_expr=[
107+
CE(type_='Python', code='np.sum(data)', jitted=False),
108+
CE(type_='Numba', code='np.sum(data)', jitted=True),
109+
CE(type_='SDC', code='sdc.functions.numpy_like.sum(data)', jitted=True),
110+
], usecase_params='data'),
102111
]
103112

104113
generate_test_cases(cases, TestFunctions, 'function')

0 commit comments

Comments
 (0)