@@ -121,7 +121,7 @@ def create_table_from_evaluations(eval_df,
121121 values : list
122122 '''
123123 if task_ids is not None :
124- eval_df = eval_df . loc [eval_df . task_id .isin (task_ids )]
124+ eval_df = eval_df [eval_df [ ' task_id' ] .isin (task_ids )]
125125 if flow_type == 'svm' :
126126 ncols = 4
127127 colnames = ['cost' , 'degree' , 'gamma' , 'kernel' ]
@@ -130,7 +130,7 @@ def create_table_from_evaluations(eval_df,
130130 colnames = ['alpha' , 'booster' , 'colsample_bylevel' , 'colsample_bytree' , 'eta' , 'lambda' ,
131131 'max_depth' , 'min_child_weight' , 'nrounds' , 'subsample' ]
132132 eval_df = eval_df .sample (frac = 1 ) # shuffling rows
133- run_ids = eval_df .run_id [:run_count ]
133+ run_ids = eval_df .loc [:, " run_id" ] [:run_count ]
134134 eval_table = pd .DataFrame (np .nan , index = run_ids , columns = colnames )
135135 values = []
136136 for run_id in run_ids :
@@ -150,31 +150,6 @@ def list_categorical_attributes(flow_type='svm'):
150150 return ['booster' ]
151151
152152
153- def impute_missing_values (eval_table , flow_type = 'svm' ):
154- # Replacing NaNs with fixed values outside the range of the parameters
155- # given in the supplement material of the paper
156- if flow_type == 'svm' :
157- eval_table .kernel .fillna ("None" , inplace = True )
158- eval_table .fillna (- 1 , inplace = True )
159- else :
160- eval_table .booster .fillna ("None" , inplace = True )
161- eval_table .fillna (- 1 , inplace = True )
162- return eval_table
163-
164-
165- def preprocess (eval_table , flow_type = 'svm' ):
166- eval_table = impute_missing_values (eval_table , flow_type )
167- # Encode categorical variables as one-hot vectors
168- enc = OneHotEncoder (handle_unknown = 'ignore' )
169- enc .fit (eval_table .kernel .to_numpy ().reshape (- 1 , 1 ))
170- one_hots = enc .transform (eval_table .kernel .to_numpy ().reshape (- 1 , 1 )).toarray ()
171- if flow_type == 'svm' :
172- eval_table = np .hstack ((eval_table .drop ('kernel' , 1 ), one_hots )).astype (float )
173- else :
174- eval_table = np .hstack ((eval_table .drop ('booster' , 1 ), one_hots )).astype (float )
175- return eval_table
176-
177-
178153#############################################################################
179154# Fetching the data from OpenML
180155# *****************************
0 commit comments