Merge branch 'develop' into parallel_unit_tests

mfeurer · web-flow · commit 98aede1337af · 2017-10-05T16:57:13.000+02:00
diff --git a/.travis.yml b/.travis.yml
@@ -15,10 +15,10 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="2.7" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.21" SKLEARN_VERSION="0.18.1"
-  - DISTRIB="conda" PYTHON_VERSION="3.4" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18.1"
-  - DISTRIB="conda" PYTHON_VERSION="3.5" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18.1"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.25.2" SKLEARN_VERSION="0.18.1"
+  - DISTRIB="conda" PYTHON_VERSION="2.7" SKLEARN_VERSION="0.18.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.4" SKLEARN_VERSION="0.18.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.18.2"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="true" SKLEARN_VERSION="0.18.2"
 
 install: source ci_scripts/install.sh
 script: bash ci_scripts/test.sh
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
@@ -24,12 +24,14 @@ popd
 
 # Configure the conda environment and put it in the path using the
 # provided versions
-conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
-   numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION  \
-   scikit-learn=$SKLEARN_VERSION pandas
+conda create -n testenv --yes python=$PYTHON_VERSION pip
 source activate testenv
+pip install nose numpy scipy cython scikit-learn==$SKLEARN_VERSION pandas \
+    matplotlib jupyter notebook nbconvert nbformat jupyter_client ipython \
+    ipykernel
 
 pip install oslo.concurrency
+
 pip install matplotlib jupyter notebook nbconvert nbformat jupyter_client ipython ipykernel
 if [[ "$COVERAGE" == "true" ]]; then
     pip install codecov
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -361,6 +361,18 @@ def _prediction_to_row(rep_no, fold_no, sample_no, row_id, correct_label,
 
 # JvR: why is class labels a parameter? could be removed and taken from task object, right?
 def _run_task_get_arffcontent(model, task, class_labels):
+
+    def _prediction_to_probabilities(y, model_classes):
+        # y: list or numpy array of predictions
+        # model_classes: sklearn classifier mapping from original array id to prediction index id
+        if not isinstance(model_classes, list):
+            raise ValueError('please convert model classes to list prior to calling this fn')
+        result = np.zeros((len(y), len(model_classes)), dtype=np.float32)
+        for obs, prediction_idx in enumerate(y):
+            array_idx = model_classes.index(prediction_idx)
+            result[obs][array_idx] = 1.0
+        return result
+
     X, Y = task.get_X_and_y()
     arff_datacontent = []
     arff_tracecontent = []
@@ -428,8 +440,11 @@ def _run_task_get_arffcontent(model, task, class_labels):
                 if can_measure_runtime:
                     modelpredict_starttime = time.process_time()
 
-                ProbaY = model_fold.predict_proba(testX)
                 PredY = model_fold.predict(testX)
+                try:
+                    ProbaY = model_fold.predict_proba(testX)
+                except AttributeError:
+                    ProbaY = _prediction_to_probabilities(PredY, list(model_classes))
 
                 # add client-side calculated metrics. These might be used on the server as consistency check
                 def _calculate_local_measure(sklearn_fn, openml_name):
@@ -781,7 +796,7 @@ def _get_cached_run(run_id):
 
 
 def list_runs(offset=None, size=None, id=None, task=None, setup=None,
-              flow=None, uploader=None, tag=None):
+              flow=None, uploader=None, tag=None, display_errors=False):
     """List all runs matching all of the given filters.
 
     Perform API call `/run/list/{filters} <https://www.openml.org/api_docs/#!/run/get_run_list_filters>`_
@@ -805,6 +820,9 @@ def list_runs(offset=None, size=None, id=None, task=None, setup=None,
 
     tag : str, optional
 
+    display_errors : bool, optional (default=None)
+        Whether to list runs which have an error (for example a missing
+        prediction file).
     Returns
     -------
     list
@@ -828,6 +846,8 @@ def list_runs(offset=None, size=None, id=None, task=None, setup=None,
         api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
     if tag is not None:
         api_call += "/tag/%s" % tag
+    if display_errors:
+        api_call += "/show_errors/true"
 
     return _list_runs(api_call)
 
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -125,14 +125,14 @@ def get_metric_fn(self, sklearn_fn, kwargs={}):
         scores : list
             a list of floats, of length num_folds * num_repeats
         '''
-        if self.data_content is not None:
+        if self.data_content is not None and self.task_id is not None:
             predictions_arff = self._generate_arff_dict()
         elif 'predictions' in self.output_files:
             predictions_file_url = _file_id_to_url(self.output_files['predictions'], 'predictions.arff')
             predictions_arff = arff.loads(openml._api_calls._read_url(predictions_file_url))
             # TODO: make this a stream reader
         else:
-            raise ValueError('Run should have been locally executed.')
+            raise ValueError('Run should have been locally executed or contain outputfile reference.')
 
         attribute_names = [att[0] for att in predictions_arff['attributes']]
         if 'correct' not in attribute_names:
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -27,12 +27,21 @@
 from sklearn.linear_model import LogisticRegression, SGDClassifier, \
     LinearRegression
 from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
-from sklearn.svm import SVC
+from sklearn.svm import SVC, LinearSVC
 from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \
     StratifiedKFold
 from sklearn.pipeline import Pipeline
 
 
+class HardNaiveBayes(GaussianNB):
+    # class for testing a naive bayes classifier that does not allow soft predictions
+    def __init__(self, priors=None):
+        super(HardNaiveBayes, self).__init__(priors)
+
+    def predict_proba(*args, **kwargs):
+        raise AttributeError('predict_proba is not available when  probability=False')
+
+
 class TestRun(TestBase):
     _multiprocess_can_split_ = True
 
@@ -710,12 +719,6 @@ def test__run_task_get_arffcontent(self):
         num_folds = 10
         num_repeats = 1
 
-        clf = SGDClassifier(loss='hinge', random_state=1)
-        self.assertRaisesRegexp(AttributeError,
-                                "probability estimates are not available for loss='hinge'",
-                                openml.runs.functions._run_task_get_arffcontent,
-                                clf, task, class_labels)
-
         clf = SGDClassifier(loss='log', random_state=1)
         res = openml.runs.functions._run_task_get_arffcontent(clf, task, class_labels)
         arff_datacontent, arff_tracecontent, _, fold_evaluations, sample_evaluations = res
@@ -901,3 +904,25 @@ def test_run_on_dataset_with_missing_labels(self):
             # repeat, fold, row_id, 6 confidences, prediction and correct label
             self.assertEqual(len(row), 12)
 
+    def test_predict_proba_hardclassifier(self):
+        # task 1 (test server) is important, as it is a task with an unused class
+        tasks = [1, 3, 115]
+
+        for task_id in tasks:
+            task = openml.tasks.get_task(task_id)
+            clf1 = sklearn.pipeline.Pipeline(steps=[
+                ('imputer', sklearn.preprocessing.Imputer()), ('estimator', GaussianNB())
+            ])
+            clf2 = sklearn.pipeline.Pipeline(steps=[
+                ('imputer', sklearn.preprocessing.Imputer()), ('estimator', HardNaiveBayes())
+            ])
+
+            arff_content1, arff_header1, _, _, _ = _run_task_get_arffcontent(clf1, task, task.class_labels)
+            arff_content2, arff_header2, _, _, _ = _run_task_get_arffcontent(clf2, task, task.class_labels)
+
+            # verifies last two arff indices (predict and correct)
+            # TODO: programmatically check wether these are indeed features (predict, correct)
+            predictionsA = np.array(arff_content1)[:, -2:]
+            predictionsB = np.array(arff_content2)[:, -2:]
+
+            np.testing.assert_array_equal(predictionsA, predictionsB)