|
1 | 1 | from sklearn.linear_model import LogisticRegression, SGDClassifier |
2 | | -from sklearn.ensemble import RandomForestClassifier |
3 | | -from sklearn.model_selection import RandomizedSearchCV |
| 2 | +from sklearn.ensemble import RandomForestClassifier, BaggingClassifier |
| 3 | +from sklearn.svm import SVC |
| 4 | +from sklearn.model_selection import RandomizedSearchCV, GridSearchCV |
4 | 5 | import openml |
5 | 6 | from openml.testing import TestBase |
6 | 7 |
|
7 | 8 |
|
8 | 9 | class TestRun(TestBase): |
9 | | - def test_run_iris(self): |
10 | | - task = openml.tasks.get_task(10107) |
11 | | - clf = LogisticRegression() |
| 10 | + |
| 11 | + def _perform_run(self, task_id, num_instances, clf): |
| 12 | + task = openml.tasks.get_task(task_id) |
12 | 13 | run = openml.runs.run_task(task, clf) |
13 | 14 | run_ = run.publish() |
14 | 15 | self.assertEqual(run_, run) |
15 | 16 | self.assertIsInstance(run.dataset_id, int) |
16 | 17 |
|
17 | | - def test_run_optimize_randomforest_iris(self): |
18 | | - task = openml.tasks.get_task(10107) |
19 | | - numIterations = 5 |
| 18 | + # check arff output |
| 19 | + self.assertEqual(len(run.data_content), num_instances) |
| 20 | + return run |
20 | 21 |
|
21 | 22 |
|
22 | | - clf = RandomForestClassifier(n_estimators=numIterations) |
| 23 | + def test_run_iris(self): |
| 24 | + task_id = 10107 |
| 25 | + num_instances = 150 |
23 | 26 |
|
| 27 | + clf = LogisticRegression() |
| 28 | + self._perform_run(task_id,num_instances, clf) |
| 29 | + |
| 30 | + |
| 31 | + def test_run_optimize_randomforest_iris(self): |
| 32 | + task_id = 10107 |
| 33 | + num_instances = 150 |
| 34 | + num_folds = 10 |
| 35 | + num_iterations = 5 |
| 36 | + |
| 37 | + clf = RandomForestClassifier(n_estimators=10) |
24 | 38 | param_dist = {"max_depth": [3, None], |
25 | 39 | "max_features": [1,2,3,4], |
26 | 40 | "min_samples_split": [1,2,3,4,5,6,7,8,9,10], |
27 | 41 | "min_samples_leaf": [1,2,3,4,5,6,7,8,9,10], |
28 | 42 | "bootstrap": [True, False], |
29 | 43 | "criterion": ["gini", "entropy"]} |
30 | | - random_search = RandomizedSearchCV(clf, param_dist,n_iter=20) |
| 44 | + random_search = RandomizedSearchCV(clf, param_dist,n_iter=num_iterations) |
| 45 | + |
| 46 | + run = self._perform_run(task_id,num_instances, random_search) |
| 47 | + self.assertEqual(len(run.trace_content), num_iterations * num_folds) |
| 48 | + |
| 49 | + def test_run_optimize_bagging_iris(self): |
| 50 | + task_id = 10107 |
| 51 | + num_instances = 150 |
| 52 | + num_folds = 10 |
| 53 | + num_iterations = 36 # (num values for C times gamma) |
| 54 | + |
| 55 | + task = openml.tasks.get_task(task_id) |
| 56 | + bag = BaggingClassifier(base_estimator=SVC()) |
| 57 | + param_dist = {"base_estimator__C": [0.001, 0.01, 0.1, 1, 10, 100], |
| 58 | + "base_estimator__gamma": [0.001, 0.01, 0.1, 1, 10, 100]} |
| 59 | + grid_search = GridSearchCV(bag, param_dist) |
| 60 | + |
| 61 | + run = self._perform_run(task_id, num_instances, grid_search) |
| 62 | + self.assertEqual(len(run.trace_content), num_iterations * num_folds) |
31 | 63 |
|
32 | | - run = openml.runs.run_task(task, random_search) |
33 | | - run_ = run.publish() |
34 | | - self.assertEqual(run_, run) |
35 | | - self.assertIsInstance(run.dataset_id, int) |
36 | 64 |
|
37 | 65 | def test__run_task_get_arffcontent(self): |
38 | 66 | task = openml.tasks.get_task(1939) |
|
0 commit comments