Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 12745a1

Browse files
authored
Impl isnan/notnan numpy-like (#584)
* Impl isnan/notnan numpy-like * fix optimization * fix notnan
1 parent c546192 commit 12745a1

3 files changed

Lines changed: 139 additions & 4 deletions

File tree

sdc/functions/numpy_like.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,22 @@
4040

4141
import sdc
4242
from sdc.utilities.sdc_typing_utils import TypeChecker
43-
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size)
43+
from sdc.str_arr_ext import (StringArrayType, pre_alloc_string_array, get_utf8_size, str_arr_is_na)
4444
from sdc.utilities.utils import sdc_overload, sdc_register_jitable
4545

4646

4747
def astype(self, dtype):
4848
pass
4949

5050

51+
def isnan(self):
52+
pass
53+
54+
55+
def notnan(self):
56+
pass
57+
58+
5159
def sum(self):
5260
pass
5361

@@ -117,6 +125,82 @@ def sdc_astype_number_impl(self, dtype):
117125
ty_checker.raise_exc(self.dtype, 'str or type', 'self.dtype')
118126

119127

128+
@sdc_overload(notnan)
129+
def sdc_isnan_overload(self):
130+
"""
131+
Intel Scalable Dataframe Compiler Developer Guide
132+
*************************************************
133+
Parallel replacement of numpy.notnan.
134+
.. only:: developer
135+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k notnan
136+
"""
137+
138+
if not isinstance(self, types.Array):
139+
return None
140+
141+
dtype = self.dtype
142+
isnan = get_isnan(dtype)
143+
if isinstance(dtype, types.Integer):
144+
def sdc_notnan_int_impl(self):
145+
length = len(self)
146+
res = numpy.ones(shape=length, dtype=numpy.bool_)
147+
148+
return res
149+
150+
return sdc_notnan_int_impl
151+
152+
if isinstance(dtype, types.Float):
153+
def sdc_notnan_float_impl(self):
154+
length = len(self)
155+
res = numpy.empty(shape=length, dtype=numpy.bool_)
156+
for i in prange(length):
157+
res[i] = not isnan(self[i])
158+
159+
return res
160+
161+
return sdc_notnan_float_impl
162+
163+
ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')
164+
165+
166+
@sdc_overload(isnan)
167+
def sdc_isnan_overload(self):
168+
"""
169+
Intel Scalable Dataframe Compiler Developer Guide
170+
*************************************************
171+
Parallel replacement of numpy.isnan.
172+
.. only:: developer
173+
Test: python -m sdc.runtests sdc.tests.test_sdc_numpy -k isnan
174+
"""
175+
176+
if not isinstance(self, types.Array):
177+
return None
178+
179+
dtype = self.dtype
180+
isnan = get_isnan(dtype)
181+
if isinstance(dtype, types.Integer):
182+
def sdc_isnan_int_impl(self):
183+
length = len(self)
184+
res = numpy.zeros(shape=length, dtype=numpy.bool_)
185+
186+
return res
187+
188+
return sdc_isnan_int_impl
189+
190+
if isinstance(dtype, types.Float):
191+
def sdc_isnan_float_impl(self):
192+
length = len(self)
193+
res = numpy.empty(shape=length, dtype=numpy.bool_)
194+
for i in prange(length):
195+
res[i] = isnan(self[i])
196+
197+
return res
198+
199+
return sdc_isnan_float_impl
200+
201+
ty_checker.raise_exc(dtype, 'int or float', 'self.dtype')
202+
203+
120204
@sdc_overload(sum)
121205
def sdc_sum_overload(self):
122206
"""

sdc/tests/test_sdc_numpy.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,43 @@ def sdc_impl(a, t):
142142
with self.subTest(data=case, type=type_):
143143
np.testing.assert_array_equal(sdc_func(a, type_), ref_impl(a, type_))
144144

145-
def test_nansum(self):
145+
def test_isnan(self):
146146
def ref_impl(a):
147-
return np.nansum(a)
147+
return np.isnan(a)
148148

149149
def sdc_impl(a):
150-
return numpy_like.nansum(a)
150+
return numpy_like.isnan(a)
151+
152+
sdc_func = self.jit(sdc_impl)
153+
154+
cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
155+
for case in cases:
156+
a = np.array(case)
157+
with self.subTest(data=case):
158+
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
159+
160+
@unittest.skip('Needs provide String Array boxing')
161+
def test_isnan_str(self):
162+
def ref_impl(a):
163+
return np.isnan(a)
164+
165+
def sdc_impl(a):
166+
return numpy_like.isnan(a)
167+
168+
sdc_func = self.jit(sdc_impl)
169+
170+
cases = [['a', 'cc', np.nan], ['se', None, 'vvv']]
171+
for case in cases:
172+
a = np.array(case)
173+
with self.subTest(data=case):
174+
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
175+
176+
def test_notnan(self):
177+
def ref_impl(a):
178+
return np.invert(np.isnan(a))
179+
180+
def sdc_impl(a):
181+
return numpy_like.notnan(a)
151182

152183
sdc_func = self.jit(sdc_impl)
153184

@@ -172,5 +203,20 @@ def sdc_impl(a):
172203
with self.subTest(data=case):
173204
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
174205

206+
def test_nansum(self):
207+
def ref_impl(a):
208+
return np.nansum(a)
209+
210+
def sdc_impl(a):
211+
return numpy_like.nansum(a)
212+
213+
sdc_func = self.jit(sdc_impl)
214+
215+
cases = [[5, 2, 0, 333, -4], [3.3, 5.4, np.nan, 7.9, np.nan]]
216+
for case in cases:
217+
a = np.array(case)
218+
with self.subTest(data=case):
219+
np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
220+
175221
if __name__ == "__main__":
176222
unittest.main()

sdc/tests/tests_perf/test_perf_numpy.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
9999
CE(type_='Numba', code='data.astype(np.int64)', jitted=True),
100100
CE(type_='SDC', code='sdc.functions.numpy_like.astype(data, np.int64)', jitted=True),
101101
], usecase_params='data'),
102+
TC(name='isnan', size=[10 ** 7], call_expr=[
103+
CE(type_='Python', code='np.isnan(data)', jitted=False),
104+
CE(type_='Numba', code='np.isnan(data)', jitted=True),
105+
CE(type_='SDC', code='sdc.functions.numpy_like.isnan(data)', jitted=True),
106+
], usecase_params='data'),
102107
TC(name='nansum', size=[10 ** 7], call_expr=[
103108
CE(type_='Python', code='np.nansum(data)', jitted=False),
104109
CE(type_='SDC', code='sdc.functions.numpy_like.nansum(data)', jitted=True),

0 commit comments

Comments
 (0)