Merge pull request #318 from openml/fix174

janvanrijn · web-flow · commit 59e5a372012b · 2017-10-05T13:13:06.000+02:00
Fix174
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -361,6 +361,18 @@ def _prediction_to_row(rep_no, fold_no, sample_no, row_id, correct_label,
 
 # JvR: why is class labels a parameter? could be removed and taken from task object, right?
 def _run_task_get_arffcontent(model, task, class_labels):
+
+    def _prediction_to_probabilities(y, model_classes):
+        # y: list or numpy array of predictions
+        # model_classes: sklearn classifier mapping from original array id to prediction index id
+        if not isinstance(model_classes, list):
+            raise ValueError('please convert model classes to list prior to calling this fn')
+        result = np.zeros((len(y), len(model_classes)), dtype=np.float32)
+        for obs, prediction_idx in enumerate(y):
+            array_idx = model_classes.index(prediction_idx)
+            result[obs][array_idx] = 1.0
+        return result
+
     X, Y = task.get_X_and_y()
     arff_datacontent = []
     arff_tracecontent = []
@@ -428,8 +440,11 @@ def _run_task_get_arffcontent(model, task, class_labels):
                 if can_measure_runtime:
                     modelpredict_starttime = time.process_time()
 
-                ProbaY = model_fold.predict_proba(testX)
                 PredY = model_fold.predict(testX)
+                try:
+                    ProbaY = model_fold.predict_proba(testX)
+                except AttributeError:
+                    ProbaY = _prediction_to_probabilities(PredY, list(model_classes))
 
                 # add client-side calculated metrics. These might be used on the server as consistency check
                 def _calculate_local_measure(sklearn_fn, openml_name):
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -27,12 +27,21 @@
 from sklearn.linear_model import LogisticRegression, SGDClassifier, \
     LinearRegression
 from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
-from sklearn.svm import SVC
+from sklearn.svm import SVC, LinearSVC
 from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \
     StratifiedKFold
 from sklearn.pipeline import Pipeline
 
 
+class HardNaiveBayes(GaussianNB):
+    # class for testing a naive bayes classifier that does not allow soft predictions
+    def __init__(self, priors=None):
+        super(HardNaiveBayes, self).__init__(priors)
+
+    def predict_proba(*args, **kwargs):
+        raise AttributeError('predict_proba is not available when  probability=False')
+
+
 class TestRun(TestBase):
 
     def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
@@ -707,12 +716,6 @@ def test__run_task_get_arffcontent(self):
         num_folds = 10
         num_repeats = 1
 
-        clf = SGDClassifier(loss='hinge', random_state=1)
-        self.assertRaisesRegexp(AttributeError,
-                                "probability estimates are not available for loss='hinge'",
-                                openml.runs.functions._run_task_get_arffcontent,
-                                clf, task, class_labels)
-
         clf = SGDClassifier(loss='log', random_state=1)
         res = openml.runs.functions._run_task_get_arffcontent(clf, task, class_labels)
         arff_datacontent, arff_tracecontent, _, fold_evaluations, sample_evaluations = res
@@ -898,3 +901,25 @@ def test_run_on_dataset_with_missing_labels(self):
             # repeat, fold, row_id, 6 confidences, prediction and correct label
             self.assertEqual(len(row), 12)
 
+    def test_predict_proba_hardclassifier(self):
+        # task 1 (test server) is important, as it is a task with an unused class
+        tasks = [1, 3, 115]
+
+        for task_id in tasks:
+            task = openml.tasks.get_task(task_id)
+            clf1 = sklearn.pipeline.Pipeline(steps=[
+                ('imputer', sklearn.preprocessing.Imputer()), ('estimator', GaussianNB())
+            ])
+            clf2 = sklearn.pipeline.Pipeline(steps=[
+                ('imputer', sklearn.preprocessing.Imputer()), ('estimator', HardNaiveBayes())
+            ])
+
+            arff_content1, arff_header1, _, _, _ = _run_task_get_arffcontent(clf1, task, task.class_labels)
+            arff_content2, arff_header2, _, _, _ = _run_task_get_arffcontent(clf2, task, task.class_labels)
+
+            # verifies last two arff indices (predict and correct)
+            # TODO: programmatically check wether these are indeed features (predict, correct)
+            predictionsA = np.array(arff_content1)[:, -2:]
+            predictionsB = np.array(arff_content2)[:, -2:]
+
+            np.testing.assert_array_equal(predictionsA, predictionsB)