incorporate pieter's feedback

mfeurer · mfeurer · commit 2d2d3edcd466 · 2019-04-18T12:12:19.000+02:00
diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py
@@ -155,7 +155,6 @@ def _run_model_on_fold(
         fold_no: int,
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix]] = None,
-        classes: Optional[List] = None,
     ) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Optional['OpenMLRunTrace']]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction information.
 
@@ -179,9 +178,6 @@ def _run_model_on_fold(
             indices to the potential classes specified by dataset.
         X_test : Optional, array-like (default=None)
             Test attributes to test for generalization in supervised tasks.
-        classes : List
-            List of classes for supervised classification tasks (and supervised data stream
-            classification).
 
         Returns
         -------
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -1103,7 +1103,6 @@ def _run_model_on_fold(
         fold_no: int,
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
-        classes: Optional[List] = None,
     ) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Optional[OpenMLRunTrace]]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction
         information.
@@ -1134,9 +1133,6 @@ def _run_model_on_fold(
             indices to the potential classes specified by dataset.
         X_test : Optional, array-like (default=None)
             Test attributes to test for generalization in supervised tasks.
-        classes : List
-            List of classes for supervised classification tasks (and supervised data stream
-            classification).
 
         Returns
         -------
@@ -1183,6 +1179,12 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
                 result[obs][prediction_idx] = 1.0
             return result
 
+        if isinstance(task, OpenMLSupervisedTask):
+            if y_train is None:
+                raise TypeError('argument y_train must not be of type None')
+            if X_test is None:
+                raise TypeError('argument X_test must not be of type None')
+
         # TODO: if possible, give a warning if model is already fitted (acceptable
         # in case of custom experimentation,
         # but not desirable if we want to upload to OpenML).
@@ -1259,21 +1261,18 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
 
-            if classes is None:
-                raise TypeError("Argument classes must not be of type 'None'")
-
             try:
                 proba_y = model_copy.predict_proba(X_test)
             except AttributeError:
-                proba_y = _prediction_to_probabilities(pred_y, list(classes))
+                proba_y = _prediction_to_probabilities(pred_y, list(task.class_labels))
 
-            if proba_y.shape[1] != len(classes):
+            if proba_y.shape[1] != len(task.class_labels):
                 # Remap the probabilities in case there was a class missing at training time
                 # By default, the classification targets are mapped to be zero-based indices to the
                 # actual classes. Therefore, the model_classes contain the correct indices to the
                 # correct probability array (the actually array might be incorrect if there are
                 # some classes not present during train time).
-                proba_y_new = np.zeros((proba_y.shape[0], len(classes)))
+                proba_y_new = np.zeros((proba_y.shape[0], len(task.class_labels)))
                 for idx, model_class in enumerate(model_classes):
                     proba_y_new[:, model_class] = proba_y[:, idx]
                 proba_y = proba_y_new
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -440,7 +440,6 @@ def _run_task_get_arffcontent(
                     rep_no=rep_no,
                     fold_no=fold_no,
                     X_test=test_x,
-                    classes=classes,
                 )
 
                 arff_datacontent_fold = []  # type: List[List]
@@ -516,7 +515,7 @@ def _calculate_local_measure(sklearn_fn, openml_name):
     if len(traces) > 0:
         if len(traces) != n_fit:
             raise ValueError(
-                'Did not find enough traces (expected %d, found %d)' % (n_fit, len(traces))
+                'Did not find enough traces (expected {}, found {})'.format(n_fit, len(traces))
             )
         else:
             trace = OpenMLRunTrace.merge_traces(traces)
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
@@ -349,7 +349,7 @@ def trace_from_xml(cls, xml):
         return cls(run_id, trace)
 
     @classmethod
-    def merge_traces(cls, traces: List['OpenMLRunTrace']):
+    def merge_traces(cls, traces: List['OpenMLRunTrace']) -> 'OpenMLRunTrace':
         for i in range(1, len(traces)):
             if traces[i] != traces[i - 1]:
                 raise ValueError('Cannot merge traces!')
@@ -363,7 +363,7 @@ def merge_traces(cls, traces: List['OpenMLRunTrace']):
         return cls(None, merged_trace)
 
     def __str__(self):
-        return '[Run id: %d, %d trace iterations]' % (
+        return '[Run id: %d, %d trace iterations]'.format(
             -1 if self.run_id is None else self.run_id,
             len(self.trace_iterations),
         )
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -1,5 +1,10 @@
 import io
 import os
+from typing import Union
+
+import numpy as np
+import pandas as pd
+import scipy.sparse
 
 from .. import datasets
 from .split import OpenMLSplit
@@ -108,7 +113,10 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
         self.target_name = target_name
         self.split = None
 
-    def get_X_and_y(self, dataset_format='array'):
+    def get_X_and_y(
+        self,
+        dataset_format: str = 'array',
+    ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix]:
         """Get data associated with the current task.
 
         Returns
@@ -177,7 +185,10 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
         )
         self.number_of_clusters = number_of_clusters
 
-    def get_X(self, dataset_format='array'):
+    def get_X(
+        self,
+        dataset_format: str = 'array',
+    ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix]:
         """Get data associated with the current task.
 
         Returns
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1299,7 +1299,6 @@ def test_run_model_on_fold_classification_1(self):
             X_train=X_train,
             y_train=y_train,
             X_test=X_test,
-            classes=task.class_labels,
         )
 
         y_hat, y_hat_proba, user_defined_measures, trace = res
@@ -1355,7 +1354,6 @@ def test_run_model_on_fold_classification_2(self):
             X_train=X_train,
             y_train=y_train,
             X_test=X_test,
-            classes=task.class_labels,
         )
 
         y_hat, y_hat_proba, user_defined_measures, trace = res
@@ -1423,7 +1421,6 @@ def predict_proba(*args, **kwargs):
                 X_test=X_test,
                 fold_no=0,
                 rep_no=0,
-                classes=task.class_labels,
             )
             pred_2, proba_2, _, _ = self.extension._run_model_on_fold(
                 model=clf2,
@@ -1433,11 +1430,24 @@ def predict_proba(*args, **kwargs):
                 X_test=X_test,
                 fold_no=0,
                 rep_no=0,
-                classes=task.class_labels,
             )
 
             # verifies that the predictions are identical
             np.testing.assert_array_equal(pred_1, pred_2)
+            np.testing.assert_array_almost_equal(np.sum(proba_1, axis=1), np.ones(X_test.shape[0]))
+            # Test that there are predictions other than ones and zeros
+            print(proba_1, proba_2)
+            self.assertLess(
+                np.sum(proba_1 == 0) + np.sum(proba_1 == 1),
+                X_test.shape[0] * len(task.class_labels),
+            )
+
+            np.testing.assert_array_almost_equal(np.sum(proba_2, axis=1), np.ones(X_test.shape[0]))
+            # Test that there are only ones and zeros predicted
+            self.assertEqual(
+                np.sum(proba_2 == 0) + np.sum(proba_2 == 1),
+                X_test.shape[0] * len(task.class_labels),
+            )
 
     def test_run_model_on_fold_regression(self):
         # There aren't any regression tasks on the test server