@@ -929,21 +929,46 @@ def sdc_pandas_dataframe_drop_impl(df, _func_name, args, columns):
929929 return sdc_pandas_dataframe_drop_impl (df , _func_name , args , columns )
930930
931931
932- def df_getitem_slice_idx_main_codelines (self ):
933- """Generate main code lines for df.getitem """
932+ def df_index_codelines (self ):
933+ """Generate code lines to get or create index of DF """
934934 if isinstance (self .index , types .NoneType ):
935935 func_lines = [' length = len(get_dataframe_data(self, 0))' ,
936936 ' _index = numpy.arange(length)' ,
937- ' res_index = _index[idx] ' ]
937+ ' res_index = _index' ]
938938 else :
939- func_lines = [' res_index = self._index[idx] ' ]
939+ func_lines = [' res_index = self._index' ]
940940
941+ return func_lines
942+
943+
944+ def df_getitem_slice_idx_main_codelines (self , idx ):
945+ """Generate main code lines for df.getitem with idx of slice"""
941946 results = []
947+ func_lines = df_index_codelines (self )
942948 for i , col in enumerate (self .columns ):
943949 res_data = f'res_data_{ i } '
944950 func_lines += [
945951 f' data_{ i } = get_dataframe_data(self, { i } )' ,
946- f' { res_data } = pandas.Series(data_{ i } [idx], index=res_index, name="{ col } ")'
952+ f' { res_data } = pandas.Series(data_{ i } [idx], index=res_index[idx], name="{ col } ")'
953+ ]
954+ results .append ((col , res_data ))
955+
956+ data = ', ' .join (f'"{ col } ": { data } ' for col , data in results )
957+ func_lines += [f' return pandas.DataFrame({{{ data } }}, index=res_index[idx])' ]
958+
959+ return func_lines
960+
961+
962+ def df_getitem_tuple_idx_main_codelines (self , literal_idx ):
963+ """Generate main code lines for df.getitem with idx of tuple"""
964+ results = []
965+ func_lines = df_index_codelines (self )
966+ needed_cols = {col : i for i , col in enumerate (self .columns ) if col in literal_idx }
967+ for col , i in needed_cols .items ():
968+ res_data = f'res_data_{ i } '
969+ func_lines += [
970+ f' data_{ i } = get_dataframe_data(self, { i } )' ,
971+ f' { res_data } = pandas.Series(data_{ i } , index=res_index, name="{ col } ")'
947972 ]
948973 results .append ((col , res_data ))
949974
@@ -953,20 +978,20 @@ def df_getitem_slice_idx_main_codelines(self):
953978 return func_lines
954979
955980
956- def df_getitem_str_slice_codegen (self ):
981+ def df_getitem_slice_idx_codegen (self , idx ):
957982 """
958983 Example of generated implementation with provided index:
959- def _df_getitem_slice_idx_impl(self, idx):
960- res_index = self._index[idx]
984+ def _df_getitem_slice_idx_impl(self, idx)
985+ res_index = self._index
961986 data_0 = get_dataframe_data(self, 0)
962- res_data_0 = pandas.Series(data_0[idx], index=res_index, name="A")
987+ res_data_0 = pandas.Series(data_0[idx], index=res_index[idx] , name="A")
963988 data_1 = get_dataframe_data(self, 1)
964989 res_data_1 = pandas.Series(data_1[idx], index=res_index, name="B")
965- return pandas.DataFrame({"A": res_data_0, "B": res_data_1}, index=res_index)
990+ return pandas.DataFrame({"A": res_data_0, "B": res_data_1}, index=res_index[idx] )
966991 """
967992 func_lines = ['def _df_getitem_slice_idx_impl(self, idx):' ]
968993 if self .columns :
969- func_lines += df_getitem_slice_idx_main_codelines (self )
994+ func_lines += df_getitem_slice_idx_main_codelines (self , idx )
970995 else :
971996 # raise KeyError if input DF is empty
972997 func_lines += [' raise KeyError' ]
@@ -978,14 +1003,52 @@ def _df_getitem_slice_idx_impl(self, idx):
9781003 return func_text , global_vars
9791004
9801005
981- def gen_df_getitem_slice_idx_impl (self ):
982- func_text , global_vars = df_getitem_str_slice_codegen (self )
1006+ def df_getitem_tuple_idx_codegen (self , idx ):
1007+ """
1008+ Example of generated implementation with provided index:
1009+ def _df_getitem_tuple_idx_impl(self, idx)
1010+ res_index = self._index
1011+ data_1 = get_dataframe_data(self, 1)
1012+ res_data_1 = pandas.Series(data_1, index=res_index, name="B")
1013+ data_2 = get_dataframe_data(self, 2)
1014+ res_data_2 = pandas.Series(data_2, index=res_index, name="C")
1015+ return pandas.DataFrame({"B": res_data_1, "C": res_data_2}, index=res_index)
1016+ """
1017+ func_lines = ['def _df_getitem_tuple_idx_impl(self, idx):' ]
1018+ literal_idx = {col .literal_value for col in idx }
1019+ key_error = any (i not in self .columns for i in literal_idx )
9831020
984- loc_vars = {}
985- exec (func_text , global_vars , loc_vars )
986- _impl = loc_vars ['_df_getitem_slice_idx_impl' ]
1021+ if self .columns and not key_error :
1022+ func_lines += df_getitem_tuple_idx_main_codelines (self , literal_idx )
1023+ else :
1024+ # raise KeyError if input DF is empty or idx is invalid
1025+ func_lines += [' raise KeyError' ]
1026+
1027+ func_text = '\n ' .join (func_lines )
1028+ global_vars = {'pandas' : pandas , 'numpy' : numpy ,
1029+ 'get_dataframe_data' : get_dataframe_data }
1030+
1031+ return func_text , global_vars
1032+
1033+
1034+ def gen_df_getitem_impl_generator (codegen , impl_name ):
1035+ """Generate generator of df.getitem"""
1036+ def _df_getitem_impl_generator (self , idx ):
1037+ func_text , global_vars = codegen (self , idx )
1038+
1039+ loc_vars = {}
1040+ exec (func_text , global_vars , loc_vars )
1041+ _impl = loc_vars [impl_name ]
9871042
988- return _impl
1043+ return _impl
1044+
1045+ return _df_getitem_impl_generator
1046+
1047+
1048+ gen_df_getitem_slice_idx_impl = gen_df_getitem_impl_generator (
1049+ df_getitem_slice_idx_codegen , '_df_getitem_slice_idx_impl' )
1050+ gen_df_getitem_tuple_idx_impl = gen_df_getitem_impl_generator (
1051+ df_getitem_tuple_idx_codegen , '_df_getitem_tuple_idx_impl' )
9891052
9901053
9911054@sdc_overload (operator .getitem )
@@ -1018,8 +1081,11 @@ def _df_getitem_unicode_idx_impl(self, idx):
10181081
10191082 return _df_getitem_unicode_idx_impl
10201083
1084+ if isinstance (idx , types .Tuple ):
1085+ return gen_df_getitem_tuple_idx_impl (self , idx )
1086+
10211087 if isinstance (idx , types .SliceType ):
1022- return gen_df_getitem_slice_idx_impl (self )
1088+ return gen_df_getitem_slice_idx_impl (self , idx )
10231089
10241090 ty_checker = TypeChecker ('Operator getitem().' )
10251091 ty_checker .raise_exc (idx , 'str' , 'idx' )
0 commit comments