@@ -432,12 +432,19 @@ def hpat_pandas_series_getitem_idx_series_impl(self, idx):
432432
433433
434434@sdc_overload (operator .setitem )
435- def hpat_pandas_series_setitem (self , idx , value ):
435+ def sdc_pandas_series_setitem (self , idx , value ):
436436 """
437437 Intel Scalable Dataframe Compiler User Guide
438438 ********************************************
439439 Pandas API: pandas.Series.__setitem__
440440
441+ Limitations
442+ -----------
443+ Not supported for idx as a string slice, e.g. S['a':'f'] = value
444+ Not supported for string series
445+ Not supported for a case of setting value for non existing index
446+ Not supported for cases when setting causes change of the Series dtype
447+
441448 Examples
442449 --------
443450 .. literalinclude:: ../../../examples/series_setitem_int.py
@@ -477,56 +484,278 @@ def hpat_pandas_series_setitem(self, idx, value):
477484
478485 Intel Scalable Dataframe Compiler Developer Guide
479486 *************************************************
480- Pandas Series operator :attr:`pandas.Series.set` implementation
487+ Pandas Series operator :attr:`pandas.Series.set` implementation
481488
482489 Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_setitem*
483490
484491 Parameters
485492 ----------
486493 series: :obj:`pandas.Series`
487494 input series
488- idx: :obj:`int `, :obj:`slice` or :obj:`pandas.Series`
495+ idx: :obj:`scalar `, :obj:`slice`, :obj:`array ` or :obj:`pandas.Series`
489496 input index
490- value: :object
497+ value: :object:`scalar`, :obj:`array` or :obj:`pandas.Series`
491498 input value
492499
493500 Returns
494501 -------
495- :class:`pandas.Series` or an element of the underneath type
502+ :class:`pandas.Series`
496503 object of :class:`pandas.Series`
497504 """
498505
499- ty_checker = TypeChecker ('Operator setitem.' )
506+ _func_name = 'Operator setitem().'
507+ ty_checker = TypeChecker (_func_name )
500508 ty_checker .check (self , SeriesType )
501509
502- if not (isinstance (idx , (types .Integer , types .SliceType , SeriesType ))):
503- ty_checker .raise_exc (idx , 'int, Slice, Series' , 'idx' )
510+ if not (isinstance (idx , (types .Number , types .UnicodeType , types .SliceType , types .Array , SeriesType ))):
511+ ty_checker .raise_exc (idx , 'scalar, Slice, Array or Series' , 'idx' )
512+
513+ all_supported_scalar_types = (types .Number , types .UnicodeType , types .Boolean )
514+ if not (isinstance (value , all_supported_scalar_types ) or isinstance (value , (SeriesType , types .Array ))):
515+ ty_checker .raise_exc (value , 'scalar, Array or Series' , 'value' )
516+
517+ if not check_types_comparable (self , value ):
518+ msg = '{} The value and Series data must be comparable. Given: self.dtype={}, value={}'
519+ raise TypingError (msg .format (_func_name , self .dtype , value ))
520+
521+ # idx is not necessarily of the same dtype as self.index, e.g. it might be a Boolean indexer or a Slice
522+ if not (check_types_comparable (idx , self .index )
523+ or isinstance (idx , (types .Integer , types .SliceType ))
524+ or (isinstance (idx , (SeriesType , types .Array )) and isinstance (idx .dtype , (types .Integer , types .Boolean )))):
525+ msg = '{} The idx is not comparable to Series index, not a Boolean or integer indexer or a Slice. ' + \
526+ 'Given: self.index={}, idx={}'
527+ raise TypingError (msg .format (_func_name , self .index , idx ))
528+
529+ value_is_series = isinstance (value , SeriesType )
530+ value_is_array = isinstance (value , types .Array )
531+
532+ # for many cases pandas setitem assigns values along positions in self._data
533+ # not considering Series index, so a common implementation exists
534+ idx_is_boolean_array = isinstance (idx , types .Array ) and isinstance (idx .dtype , types .Boolean )
535+ idx_is_boolean_series = isinstance (idx , SeriesType ) and isinstance (idx .dtype , types .Boolean )
536+ idx_and_self_index_comparable = check_types_comparable (self .index , idx )
537+ self_index_is_none = isinstance (self .index , types .NoneType )
538+ assign_along_positions = ((self_index_is_none
539+ or isinstance (idx , types .SliceType )
540+ or not idx_and_self_index_comparable )
541+ and not idx_is_boolean_series
542+ and not idx_is_boolean_array )
543+
544+ idx_is_scalar = isinstance (idx , (types .Number , types .UnicodeType ))
545+ if assign_along_positions or idx_is_scalar :
546+
547+ idx_is_numeric_or_boolean_series = (isinstance (idx , SeriesType )
548+ and isinstance (idx .dtype , (types .Number , types .Boolean )))
549+ assign_via_idx_mask = idx_is_scalar and idx_and_self_index_comparable
550+ assign_via_idx_data = idx_is_numeric_or_boolean_series and not idx_and_self_index_comparable
551+
552+ def sdc_pandas_series_setitem_no_reindexing_impl (self , idx , value ):
553+
554+ if assign_via_idx_mask == True : # noqa
555+ _idx = self ._index == idx
556+ elif assign_via_idx_data == True : # noqa
557+ _idx = idx ._data
558+ else :
559+ _idx = idx
504560
505- if not ((isinstance (value , SeriesType ) and isinstance (value .dtype , self .dtype )) or \
506- isinstance (value , type (self .dtype ))):
507- ty_checker .raise_exc (value , self .dtype , 'value' )
561+ _value = value ._data if value_is_series == True else value # noqa
508562
509- if isinstance (idx , types .Integer ) or isinstance (idx , types .SliceType ):
510- def hpat_pandas_series_setitem_idx_integer_impl (self , idx , value ):
511- """
512- Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_setitem_for_value
513- Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_setitem_for_slice
514- """
515- self ._data [idx ] = value
563+ self ._data [_idx ] = _value
516564 return self
517565
518- return hpat_pandas_series_setitem_idx_integer_impl
566+ return sdc_pandas_series_setitem_no_reindexing_impl
567+
568+ if (idx_is_boolean_array or idx_is_boolean_series ) and value_is_series :
569+
570+ self_index_dtype = types .int64 if isinstance (self .index , types .NoneType ) else self .index .dtype
571+ value_index_dtype = types .int64 if isinstance (value .index , types .NoneType ) else value .index .dtype
572+ if (isinstance (self_index_dtype , types .Number ) and isinstance (value_index_dtype , types .Number )):
573+ indexes_common_dtype = find_common_dtype_from_numpy_dtypes ([self_index_dtype , value_index_dtype ], [])
574+ elif (isinstance (self_index_dtype , types .UnicodeType ) and isinstance (value_index_dtype , types .UnicodeType )):
575+ indexes_common_dtype = types .unicode_type
576+ else :
577+ msg = '{} The self and value indexes must be comparable. Given: self.dtype={}, value.dtype={}'
578+ raise TypingError (msg .format (_func_name , self_index_dtype , value_index_dtype ))
579+
580+ if idx_is_boolean_array :
581+
582+ def sdc_pandas_series_setitem_idx_bool_array_align_impl (self , idx , value ):
583+
584+ # if idx is a Boolean array (and value is a series) it's used as a mask for self.index
585+ # and filtered indexes are looked in value.index, and if found corresponding value is set
586+ if value_is_series == True : # noqa
587+ value_index , self_index = value .index , self .index
588+ unique_value_indices , unique_self_indices = set (value_index ), set (self_index )
589+
590+ # pandas behaves differently if value.index has duplicates and if it has no
591+ # in case of duplicates in value.index assignment is made via positions
592+ # in case there are no duplicates, value.index is used as reindexer
593+ self_index_has_duplicates = len (unique_self_indices ) != len (self_index )
594+ value_index_has_duplicates = len (unique_value_indices ) != len (value_index )
595+ if (self_index_has_duplicates or value_index_has_duplicates ):
596+ self ._data [idx ] = value ._data
597+ else :
598+ map_index_to_position = Dict .empty (
599+ key_type = indexes_common_dtype ,
600+ value_type = types .int32
601+ )
602+ for i , index_value in enumerate (value_index ):
603+ map_index_to_position [index_value ] = types .int32 (i )
604+
605+ # such iterative setitem on a StringArray will be inefficient
606+ # TODO: refactor this when str_arr setitem is fully supported
607+ for i in numba .prange (len (self_index )):
608+ if idx [i ]:
609+ self_index_value = self_index [i ]
610+ if self_index_value in map_index_to_position :
611+ self ._data [i ] = value ._data [map_index_to_position [self_index_value ]]
612+ else :
613+ sdc .hiframes .join .setitem_arr_nan (self ._data , i )
614+
615+ else :
616+ # if value has no index - nothing to reindex and assignment is made along positions set by idx mask
617+ self ._data [idx ] = value
519618
520- if isinstance (idx , SeriesType ):
521- def hpat_pandas_series_setitem_idx_series_impl (self , idx , value ):
522- """
523- Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_setitem_for_series
524- """
525- super_index = idx ._data
526- self ._data [super_index ] = value
527619 return self
528620
529- return hpat_pandas_series_setitem_idx_series_impl
621+ return sdc_pandas_series_setitem_idx_bool_array_align_impl
622+
623+ elif idx_is_boolean_series :
624+
625+ def sdc_pandas_series_setitem_idx_bool_series_align_impl (self , idx , value ):
626+
627+ self_index , idx_index = self .index , idx .index
628+ # FIXME: for now just use sorted, as == is not implemented for sets of unicode strings
629+ if (sorted (self_index ) != sorted (idx_index )):
630+ msg = "Unalignable boolean Series provided as indexer " + \
631+ "(index of the boolean Series and of the indexed object do not match)"
632+ raise ValueError (msg )
633+
634+ # if idx is a Boolean Series it's data is used as a mask for it's index
635+ # and filtered indexes are either looked in value.index (if value is a Series)
636+ # or in self.index (if value is scalar or array)
637+ filtered_idx_indices = idx_index [idx ._data ]
638+ filtered_idx_indices_set = set (filtered_idx_indices )
639+ if value_is_series == True : # noqa
640+
641+ if len (filtered_idx_indices_set ) != len (filtered_idx_indices ):
642+ raise ValueError ("cannot reindex from a duplicate axis" )
643+
644+ map_self_index_to_position = Dict .empty (
645+ key_type = indexes_common_dtype ,
646+ value_type = types .int32
647+ )
648+ for i , index_value in enumerate (self_index ):
649+ map_self_index_to_position [index_value ] = types .int32 (i )
650+
651+ value_index = value .index
652+ map_value_index_to_position = Dict .empty (
653+ key_type = indexes_common_dtype ,
654+ value_type = types .int32
655+ )
656+ for i , index_value in enumerate (value_index ):
657+ map_value_index_to_position [index_value ] = types .int32 (i )
658+
659+ # for all index values in filtered index assign element of value with this index
660+ # to element of self with this index
661+ for i in numba .prange (len (filtered_idx_indices )):
662+ idx_index_value = filtered_idx_indices [i ]
663+ if idx_index_value in map_value_index_to_position :
664+ self_index_pos = map_self_index_to_position [idx_index_value ]
665+ value_index_pos = map_value_index_to_position [idx_index_value ]
666+ self ._data [self_index_pos ] = value ._data [value_index_pos ]
667+ else :
668+ sdc .hiframes .join .setitem_arr_nan (self ._data , map_self_index_to_position [idx_index_value ])
669+ else :
670+ # use filtered index values to create a set mask, then make assignment to self
671+ # using this mask (i.e. the order of filtered indices in self.index does not matter)
672+ self_index_size = len (self_index )
673+ set_mask = numpy .zeros (self_index_size , dtype = numpy .bool_ )
674+ for i in numba .prange (self_index_size ):
675+ if self_index [i ] in filtered_idx_indices_set :
676+ set_mask [i ] = True
677+ self ._data [set_mask ] = value
678+
679+ return self
680+
681+ return sdc_pandas_series_setitem_idx_bool_series_align_impl
682+
683+ elif isinstance (idx , (SeriesType , types .Array )) and idx_and_self_index_comparable :
684+
685+ # idx is numeric Series or array comparable with self.index, hence reindexing is possible
686+ if isinstance (self .index .dtype , types .Number ):
687+
688+ idx_is_series = isinstance (idx , SeriesType )
689+ value_is_scalar = not (value_is_series or value_is_array )
690+ def sdc_pandas_series_setitem_idx_int_series_align_impl (self , idx , value ):
691+
692+ _idx = idx ._data if idx_is_series == True else idx # noqa
693+ _value = value ._data if value_is_series == True else value # noqa
694+
695+ self_index_size = len (self ._index )
696+ idx_size = len (_idx )
697+ valid_indices = numpy .repeat (- 1 , self_index_size )
698+ for i in numba .prange (self_index_size ):
699+ for j in numpy .arange (idx_size ):
700+ if self ._index [i ] == _idx [j ]:
701+ valid_indices [i ] = j
702+
703+ valid_indices_positions = numpy .arange (self_index_size )[valid_indices != - 1 ]
704+ valid_indices_masked = valid_indices [valid_indices != - 1 ]
705+
706+ indexes_found = self ._index [valid_indices_positions ]
707+ if len (numpy .unique (indexes_found )) != len (indexes_found ):
708+ raise ValueError ("Reindexing only valid with uniquely valued Index objects" )
709+
710+ if len (valid_indices_masked ) != idx_size :
711+ raise ValueError ("Reindexing not possible: idx has index not found in Series" )
712+
713+ if value_is_scalar == True : # noqa
714+ self ._data [valid_indices_positions ] = _value
715+ else :
716+ self ._data [valid_indices_positions ] = numpy .take (_value , valid_indices_masked )
717+
718+ return self
719+
720+ return sdc_pandas_series_setitem_idx_int_series_align_impl
721+
722+ elif isinstance (self .index .dtype , types .UnicodeType ):
723+
724+ def sdc_pandas_series_setitem_idx_str_series_align_impl (self , idx , value ):
725+
726+ map_index_to_position = Dict .empty (
727+ key_type = types .unicode_type ,
728+ value_type = types .int32
729+ )
730+ for i , index_value in enumerate (self ._index ):
731+ if index_value in map_index_to_position :
732+ raise ValueError ("Reindexing only valid with uniquely valued Index objects" )
733+ map_index_to_position [index_value ] = types .int32 (i )
734+
735+ idx_data_size = len (idx ._data )
736+ number_of_found = 0
737+ set_positions = numpy .empty (idx_data_size , dtype = types .int32 )
738+ for i in numba .prange (len (idx ._data )):
739+ index_value = idx ._data [i ]
740+ if index_value in map_index_to_position :
741+ number_of_found += 1
742+ set_positions [i ] = map_index_to_position [index_value ]
743+
744+ if number_of_found != idx_data_size :
745+ raise ValueError ("Reindexing not possible: idx has index not found in Series" )
746+
747+ if value_is_series == True : # noqa
748+ self ._data [set_positions ] = value ._data
749+ else :
750+ self ._data [set_positions ] = value
751+ return self
752+
753+ return sdc_pandas_series_setitem_idx_str_series_align_impl
754+
755+ else : # self.index.dtype other than types.Number or types.Unicode
756+ return None
757+
758+ return None
530759
531760
532761@sdc_overload_attribute (SeriesType , 'iloc' )
0 commit comments