22import io
33import os
44import xmltodict
5+ import numpy as np
6+ import warnings
57from sklearn .model_selection ._search import BaseSearchCV
68
79from .. import config
@@ -70,6 +72,41 @@ def run_task(task, model):
7072 return run
7173
7274
75+ def _prediction_to_row (rep_no , fold_no , row_id , correct_label , predicted_label , predicted_probabilities , class_labels , model_classes_mapping ):
76+ """Complicated util function that turns probability estimates of a classifier for a given instance into the right arff format to upload to openml.
77+
78+ Parameters
79+ ----------
80+ rep_no : int
81+ fold_no : int
82+ row_id : int
83+ row id in the initial dataset
84+ correct_label : str
85+ original label of the instance
86+ predicted_label : str
87+ the label that was predicted
88+ predicted_probabilities : array (size=num_classes)
89+ probabilities per class
90+ class_labels : array (size=num_classes)
91+
92+ Returns
93+ -------
94+ arff_line : list
95+ representation of the current prediction in OpenML format
96+ """
97+ arff_line = [rep_no , fold_no , row_id ]
98+ for class_label_idx in range (len (class_labels )):
99+ if class_label_idx in model_classes_mapping :
100+ index = np .where (model_classes_mapping == class_label_idx )[0 ][0 ] # TODO: WHY IS THIS 2D???
101+ arff_line .append (predicted_probabilities [index ])
102+ else :
103+ arff_line .append (0.0 )
104+
105+ arff_line .append (class_labels [predicted_label ])
106+ arff_line .append (correct_label )
107+ return arff_line
108+
109+ # JvR: why is class labels a parameter? could be removed and taken from task object, right?
73110def _run_task_get_arffcontent (model , task , class_labels ):
74111 X , Y = task .get_X_and_y ()
75112 arff_datacontent = []
@@ -89,18 +126,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
89126
90127 model .fit (trainX , trainY )
91128 if isinstance (model , BaseSearchCV ):
92- _add_results_to_arfftrace (arff_tracecontent , fold_no , model ,
93- rep_no )
129+ _add_results_to_arfftrace (arff_tracecontent , fold_no , model , rep_no )
94130
95131 ProbaY = model .predict_proba (testX )
96132 PredY = model .predict (testX )
133+ if ProbaY .shape [1 ] != len (class_labels ):
134+ warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
97135
98136 for i in range (0 , len (test_indices )):
99- assert (len (ProbaY [i ]) == len (class_labels )), 'Predicted probabilities and available classes do not match. (sklearn bug?) '
100- arff_line = [rep_no , fold_no , test_indices [i ]]
101- arff_line .extend (ProbaY [i ])
102- arff_line .append (class_labels [PredY [i ]])
103- arff_line .append (class_labels [testY [i ]])
137+ arff_line = _prediction_to_row (rep_no , fold_no , test_indices [i ], class_labels [testY [i ]], PredY [i ], ProbaY [i ], class_labels , model .classes_ )
104138 arff_datacontent .append (arff_line )
105139
106140 fold_no = fold_no + 1
0 commit comments