add extra tests, minor refactoring

mfeurer · mfeurer · commit deda557a1d4c · 2019-04-17T17:54:18.000+02:00
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -80,7 +80,7 @@ def _read_url_files(url, data=None, file_elements=None):
         files=file_elements,
     )
     if response.status_code != 200:
-        raise _parse_server_exception(response, url=url)
+        raise _parse_server_exception(response, url)
     if 'Content-Encoding' not in response.headers or \
             response.headers['Content-Encoding'] != 'gzip':
         warnings.warn('Received uncompressed content from OpenML for {}.'
@@ -95,7 +95,7 @@ def _read_url(url, request_method, data=None):
 
     response = send_request(request_method=request_method, url=url, data=data)
     if response.status_code != 200:
-        raise _parse_server_exception(response, url=url)
+        raise _parse_server_exception(response, url)
     if 'Content-Encoding' not in response.headers or \
             response.headers['Content-Encoding'] != 'gzip':
         warnings.warn('Received uncompressed content from OpenML for {}.'
@@ -137,15 +137,15 @@ def send_request(
     return response
 
 
-def _parse_server_exception(response, url=None):
+def _parse_server_exception(response, url):
     # OpenML has a sophisticated error system
     # where information about failures is provided. try to parse this
     try:
         server_exception = xmltodict.parse(response.text)
     except Exception:
         raise OpenMLServerError(
-            'Unexpected server error. Please contact the developers!\n'
-            'Status code: {}\n{}'.format(response.status_code, response.text))
+            'Unexpected server error when calling {}. Please contact the developers!\n'
+            'Status code: {}\n{}'.format(url, response.status_code, response.text))
 
     server_error = server_exception['oml:error']
     code = int(server_error['oml:code'])
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -1099,11 +1099,11 @@ def _run_model_on_fold(
         model: Any,
         task: 'OpenMLTask',
         X_train: Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame],
-        y_train: np.ndarray,
         rep_no: int,
         fold_no: int,
+        y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
-        n_classes: Optional[int] = None,
+        classes: Optional[int] = None,
     ) -> Tuple[np.ndarray, np.ndarray, 'OrderedDict[str, float]', Any]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction
         information.
@@ -1156,7 +1156,7 @@ def _run_model_on_fold(
 
         def _prediction_to_probabilities(
                 y: np.ndarray,
-                model_classes: List,
+                classes: List,
         ) -> np.ndarray:
             """Transforms predicted probabilities to match with OpenML class indices.
 
@@ -1175,13 +1175,12 @@ def _prediction_to_probabilities(
             # y: list or numpy array of predictions
             # model_classes: sklearn classifier mapping from original array id to
             # prediction index id
-            if not isinstance(model_classes, list):
+            if not isinstance(classes, list):
                 raise ValueError('please convert model classes to list prior to '
                                  'calling this fn')
-            result = np.zeros((len(y), len(model_classes)), dtype=np.float32)
+            result = np.zeros((len(y), len(classes)), dtype=np.float32)
             for obs, prediction_idx in enumerate(y):
-                array_idx = model_classes.index(prediction_idx)
-                result[obs][array_idx] = 1.0
+                result[obs][prediction_idx] = 1.0
             return result
 
         # TODO: if possible, give a warning if model is already fitted (acceptable
@@ -1239,7 +1238,12 @@ def _prediction_to_probabilities(
 
         # In supervised learning this returns the predictions for Y, in clustering
         # it returns the clusters
-        pred_y = model_copy.predict(X_test)
+        if isinstance(task, OpenMLSupervisedTask):
+            pred_y = model_copy.predict(X_test)
+        elif isinstance(task, OpenMLClusteringTask):
+            pred_y = model_copy.predict(X_train)
+        else:
+            raise ValueError(task)
 
         if can_measure_cputime:
             modelpredict_duration_cputime = (time.process_time()
@@ -1258,13 +1262,18 @@ def _prediction_to_probabilities(
             try:
                 proba_y = model_copy.predict_proba(X_test)
             except AttributeError:
-                proba_y = _prediction_to_probabilities(pred_y, list(model_classes))
-
-            pred_y = np.array([model_classes[label] for label in pred_y], dtype=pred_y.dtype)
-            proba_y_new = np.zeros((proba_y.shape[0], n_classes))
-            for idx, class_idx in enumerate(model_classes):
-                proba_y_new[:, class_idx] = proba_y[:, idx]
-            proba_y = proba_y_new
+                proba_y = _prediction_to_probabilities(pred_y, list(classes))
+
+            if proba_y.shape[1] != len(classes):
+                # Remap the probabilities in case there was a class missing at training time
+                # By default, the classification targets are mapped to be zero-based indices to the
+                # actual classes. Therefore, the model_classes contain the correct indices to the
+                # correct probability array (the actualy array might be incorrect if there are some
+                # classes not present during train time).
+                proba_y_new = np.zeros((proba_y.shape[0], len(classes)))
+                for idx, model_class in enumerate(model_classes):
+                    proba_y_new[:, model_class] = proba_y[:, idx]
+                proba_y = proba_y_new
 
             if proba_y.shape[1] != len(task.class_labels):
                 message = "Estimator only predicted for {}/{} classes!".format(
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -395,7 +395,7 @@ def _run_task_get_arffcontent(
     # TODO use different iterator to only provide a single iterator (less
     # methods, less maintenance, less confusion)
     num_reps, num_folds, num_samples = task.get_split_dimensions()
-    n_classes = None
+    classes = None
 
     n_fit = 0
     for rep_no in range(num_reps):
@@ -406,14 +406,15 @@ def _run_task_get_arffcontent(
                 train_indices, test_indices = task.get_train_test_split_indices(
                     repeat=rep_no, fold=fold_no, sample=sample_no)
                 if isinstance(task, OpenMLSupervisedTask):
-                    x, y = task.get_X_and_y()
+                    x, y = task.get_X_and_y(dataset_format='array')
                     train_x = x[train_indices]
                     train_y = y[train_indices]
                     test_x = x[test_indices]
                     test_y = y[test_indices]
                     if isinstance(task, (OpenMLClassificationTask, OpenMLClassificationTask)):
-                        n_classes = len(task.class_labels)
+                        classes = task.class_labels
                 elif isinstance(task, OpenMLClusteringTask):
+                    x = task.get_X(dataset_format='array')
                     train_x = train_indices
                     train_y = None
                     test_x = test_indices
@@ -439,7 +440,7 @@ def _run_task_get_arffcontent(
                     rep_no=rep_no,
                     fold_no=fold_no,
                     X_test=test_x,
-                    n_classes=n_classes,
+                    classes=classes,
                 )
 
                 arff_datacontent_fold = []  # type: List[List]
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -108,7 +108,7 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
         self.target_name = target_name
         self.split = None
 
-    def get_X_and_y(self):
+    def get_X_and_y(self, dataset_format='array'):
         """Get data associated with the current task.
 
         Returns
@@ -120,7 +120,7 @@ def get_X_and_y(self):
         if self.task_type_id not in (1, 2, 3):
             raise NotImplementedError(self.task_type)
         X_and_y = dataset.get_data(
-            dataset_format='array', target=self.target_name
+            dataset_format=dataset_format, target=self.target_name,
         )
         return X_and_y
 
@@ -177,6 +177,20 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
         )
         self.number_of_clusters = number_of_clusters
 
+    def get_X(self, dataset_format='array'):
+        """Get data associated with the current task.
+
+        Returns
+        -------
+        tuple - X and y
+
+        """
+        dataset = self.get_dataset()
+        X_and_y = dataset.get_data(
+            dataset_format=dataset_format, target=None,
+        )
+        return X_and_y
+
 
 class OpenMLLearningCurveTask(OpenMLClassificationTask):
     def __init__(self, task_id, task_type_id, task_type, data_set_id,
diff --git a/openml/testing.py b/openml/testing.py
@@ -144,6 +144,7 @@ def _check_fold_timing_evaluations(
         num_folds: int,
         max_time_allowed: float = 60000.0,
         task_type: int = TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+        check_scores: bool = True,
     ):
         """
         Checks whether the right timing measures are attached to the run
@@ -167,10 +168,11 @@ def _check_fold_timing_evaluations(
             'wall_clock_time_millis': (0, max_time_allowed),
         }
 
-        if task_type in (TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE):
-            check_measures['predictive_accuracy'] = (0, 1.)
-        elif task_type == TaskTypeEnum.SUPERVISED_REGRESSION:
-            check_measures['mean_absolute_error'] = (0, float("inf"))
+        if check_scores:
+            if task_type in (TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE):
+                check_measures['predictive_accuracy'] = (0, 1.)
+            elif task_type == TaskTypeEnum.SUPERVISED_REGRESSION:
+                check_measures['mean_absolute_error'] = (0, float("inf"))
 
         self.assertIsInstance(fold_evaluations, dict)
         if sys.version_info[:2] >= (3, 3):
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py