|
27 | 27 | from sklearn.linear_model import LogisticRegression, SGDClassifier, \ |
28 | 28 | LinearRegression |
29 | 29 | from sklearn.ensemble import RandomForestClassifier, BaggingClassifier |
30 | | -from sklearn.svm import SVC |
| 30 | +from sklearn.svm import SVC, LinearSVC |
31 | 31 | from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, \ |
32 | 32 | StratifiedKFold |
33 | 33 | from sklearn.pipeline import Pipeline |
34 | 34 |
|
35 | 35 |
|
| 36 | +class HardNaiveBayes(GaussianNB): |
| 37 | + # class for testing a naive bayes classifier that does not allow soft predictions |
| 38 | + def __init__(self, priors=None): |
| 39 | + super(HardNaiveBayes, self).__init__(priors) |
| 40 | + |
| 41 | + def predict_proba(*args, **kwargs): |
| 42 | + raise AttributeError('predict_proba is not available when probability=False') |
| 43 | + |
| 44 | + |
36 | 45 | class TestRun(TestBase): |
37 | 46 |
|
38 | 47 | def _wait_for_processed_run(self, run_id, max_waiting_time_seconds): |
@@ -707,12 +716,6 @@ def test__run_task_get_arffcontent(self): |
707 | 716 | num_folds = 10 |
708 | 717 | num_repeats = 1 |
709 | 718 |
|
710 | | - clf = SGDClassifier(loss='hinge', random_state=1) |
711 | | - self.assertRaisesRegexp(AttributeError, |
712 | | - "probability estimates are not available for loss='hinge'", |
713 | | - openml.runs.functions._run_task_get_arffcontent, |
714 | | - clf, task, class_labels) |
715 | | - |
716 | 719 | clf = SGDClassifier(loss='log', random_state=1) |
717 | 720 | res = openml.runs.functions._run_task_get_arffcontent(clf, task, class_labels) |
718 | 721 | arff_datacontent, arff_tracecontent, _, fold_evaluations, sample_evaluations = res |
@@ -898,3 +901,25 @@ def test_run_on_dataset_with_missing_labels(self): |
898 | 901 | # repeat, fold, row_id, 6 confidences, prediction and correct label |
899 | 902 | self.assertEqual(len(row), 12) |
900 | 903 |
|
| 904 | + def test_predict_proba_hardclassifier(self): |
| 905 | + # task 1 (test server) is important, as it is a task with an unused class |
| 906 | + tasks = [1, 3, 115] |
| 907 | + |
| 908 | + for task_id in tasks: |
| 909 | + task = openml.tasks.get_task(task_id) |
| 910 | + clf1 = sklearn.pipeline.Pipeline(steps=[ |
| 911 | + ('imputer', sklearn.preprocessing.Imputer()), ('estimator', GaussianNB()) |
| 912 | + ]) |
| 913 | + clf2 = sklearn.pipeline.Pipeline(steps=[ |
| 914 | + ('imputer', sklearn.preprocessing.Imputer()), ('estimator', HardNaiveBayes()) |
| 915 | + ]) |
| 916 | + |
| 917 | + arff_content1, arff_header1, _, _, _ = _run_task_get_arffcontent(clf1, task, task.class_labels) |
| 918 | + arff_content2, arff_header2, _, _, _ = _run_task_get_arffcontent(clf2, task, task.class_labels) |
| 919 | + |
| 920 | + # verifies last two arff indices (predict and correct) |
| 921 | + # TODO: programmatically check wether these are indeed features (predict, correct) |
| 922 | + predictionsA = np.array(arff_content1)[:, -2:] |
| 923 | + predictionsB = np.array(arff_content2)[:, -2:] |
| 924 | + |
| 925 | + np.testing.assert_array_equal(predictionsA, predictionsB) |
0 commit comments