Skip to content

Commit b250547

Browse files
committed
Structured test cases
1 parent 0f58378 commit b250547

2 files changed

Lines changed: 42 additions & 15 deletions

File tree

openml/runs/run.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ def _generate_trace_arff_dict(self, model):
120120
arff_dict['data'] = self.trace_content
121121
arff_dict['relation'] = 'openml_task_' + str(self.task_id) + '_predictions'
122122

123-
print(arff_dict)
124123
return arff_dict
125124

126125
def publish(self):

tests/runs/test_run_functions.py

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,66 @@
11
from sklearn.linear_model import LogisticRegression, SGDClassifier
2-
from sklearn.ensemble import RandomForestClassifier
3-
from sklearn.model_selection import RandomizedSearchCV
2+
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
3+
from sklearn.svm import SVC
4+
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
45
import openml
56
from openml.testing import TestBase
67

78

89
class TestRun(TestBase):
9-
def test_run_iris(self):
10-
task = openml.tasks.get_task(10107)
11-
clf = LogisticRegression()
10+
11+
def _perform_run(self, task_id, num_instances, clf):
12+
task = openml.tasks.get_task(task_id)
1213
run = openml.runs.run_task(task, clf)
1314
run_ = run.publish()
1415
self.assertEqual(run_, run)
1516
self.assertIsInstance(run.dataset_id, int)
1617

17-
def test_run_optimize_randomforest_iris(self):
18-
task = openml.tasks.get_task(10107)
19-
numIterations = 5
18+
# check arff output
19+
self.assertEqual(len(run.data_content), num_instances)
20+
return run
2021

2122

22-
clf = RandomForestClassifier(n_estimators=numIterations)
23+
def test_run_iris(self):
24+
task_id = 10107
25+
num_instances = 150
2326

27+
clf = LogisticRegression()
28+
self._perform_run(task_id,num_instances, clf)
29+
30+
31+
def test_run_optimize_randomforest_iris(self):
32+
task_id = 10107
33+
num_instances = 150
34+
num_folds = 10
35+
num_iterations = 5
36+
37+
clf = RandomForestClassifier(n_estimators=10)
2438
param_dist = {"max_depth": [3, None],
2539
"max_features": [1,2,3,4],
2640
"min_samples_split": [1,2,3,4,5,6,7,8,9,10],
2741
"min_samples_leaf": [1,2,3,4,5,6,7,8,9,10],
2842
"bootstrap": [True, False],
2943
"criterion": ["gini", "entropy"]}
30-
random_search = RandomizedSearchCV(clf, param_dist,n_iter=20)
44+
random_search = RandomizedSearchCV(clf, param_dist,n_iter=num_iterations)
45+
46+
run = self._perform_run(task_id,num_instances, random_search)
47+
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
48+
49+
def test_run_optimize_bagging_iris(self):
50+
task_id = 10107
51+
num_instances = 150
52+
num_folds = 10
53+
num_iterations = 36 # (num values for C times gamma)
54+
55+
task = openml.tasks.get_task(task_id)
56+
bag = BaggingClassifier(base_estimator=SVC())
57+
param_dist = {"base_estimator__C": [0.001, 0.01, 0.1, 1, 10, 100],
58+
"base_estimator__gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
59+
grid_search = GridSearchCV(bag, param_dist)
60+
61+
run = self._perform_run(task_id, num_instances, grid_search)
62+
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
3163

32-
run = openml.runs.run_task(task, random_search)
33-
run_ = run.publish()
34-
self.assertEqual(run_, run)
35-
self.assertIsInstance(run.dataset_id, int)
3664

3765
def test__run_task_get_arffcontent(self):
3866
task = openml.tasks.get_task(1939)

0 commit comments

Comments
 (0)