Skip to content

Commit 32026b4

Browse files
authored
Merge pull request #195 from openml/issue-177
fix issue #177 by changing the order or executing run
2 parents 9d56347 + 0aa1bc7 commit 32026b4

3 files changed

Lines changed: 50 additions & 19 deletions

File tree

openml/runs/functions.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,6 @@ def run_task(task, model):
3939
# TODO move this into its onwn module. While it somehow belongs here, it
4040
# adds quite a lot of functionality which is better suited in other places!
4141
# TODO why doesn't this accept a flow as input? - this would make this more flexible!
42-
flow = sklearn_to_flow(model)
43-
flow_id = flow._ensure_flow_exists()
44-
if (flow_id < 0):
45-
print("No flow")
46-
return 0, 2
47-
config.logger.info(flow_id)
48-
49-
arff_datacontent = []
5042

5143
dataset = task.get_dataset()
5244
X, Y = dataset.get_data(target=task.target_name)
@@ -56,10 +48,21 @@ def run_task(task, model):
5648
raise ValueError('The task has no class labels. This method currently '
5749
'only works for tasks with class labels.')
5850

59-
run = OpenMLRun(task_id=task.task_id, flow_id=flow_id,
60-
dataset_id=dataset.dataset_id, model=model)
51+
# execute the run
52+
run = OpenMLRun(task_id=task.task_id, flow_id=None, dataset_id=dataset.dataset_id, model=model)
6153
run.data_content, run.trace_content = _run_task_get_arffcontent(model, task, class_labels)
6254

55+
# now generate the flow
56+
flow = sklearn_to_flow(model)
57+
flow_id = flow._ensure_flow_exists()
58+
if flow_id < 0:
59+
print("No flow")
60+
return 0, 2
61+
config.logger.info(flow_id)
62+
63+
# attach the flow to the run
64+
run.flow_id = flow_id
65+
6366
return run
6467

6568

openml/runs/run.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ def publish(self):
133133
"""
134134
if self.model is None:
135135
raise PyOpenMLError("OpenMLRun obj does not contain a model. (This should never happen.) ");
136+
if self.flow_id is None:
137+
raise PyOpenMLError("OpenMLRun obj does not contain a flow id. (Should have been added while executing the task.) ");
138+
136139

137140
predictions = arff.dumps(self._generate_arff_dict())
138141
description_xml = self._create_description_xml()

tests/test_runs/test_run_functions.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1-
from sklearn.linear_model import LogisticRegression, SGDClassifier
1+
import sys
2+
3+
from sklearn.linear_model import LogisticRegression, SGDClassifier, LinearRegression
24
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
35
from sklearn.svm import SVC
4-
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
6+
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold
57
import openml
68
import openml.exceptions
79
from openml.testing import TestBase
810

11+
if sys.version_info[0] >= 3:
12+
from unittest import mock
13+
else:
14+
import mock
15+
916

1017
class TestRun(TestBase):
1118

@@ -20,6 +27,23 @@ def _perform_run(self, task_id, num_instances, clf):
2027
self.assertEqual(len(run.data_content), num_instances)
2128
return run
2229

30+
def test_run_regression_on_classif_task(self):
31+
task_id = 10107
32+
33+
clf = LinearRegression()
34+
task = openml.tasks.get_task(task_id)
35+
self.assertRaises(AttributeError, openml.runs.run_task, task=task, model=clf)
36+
37+
@mock.patch('openml.flows.sklearn_to_flow')
38+
def test_check_erronous_sklearn_flow_fails(self, sklearn_to_flow_mock):
39+
task_id = 10107
40+
task = openml.tasks.get_task(task_id)
41+
42+
# Invalid parameter values
43+
clf = LogisticRegression(C='abc')
44+
self.assertEqual(sklearn_to_flow_mock.call_count, 0)
45+
self.assertRaisesRegexp(ValueError, "Penalty term must be positive; got \(C='abc'\)",
46+
openml.runs.run_task, task=task, model=clf)
2347

2448
def test_run_iris(self):
2549
task_id = 10107
@@ -28,34 +52,35 @@ def test_run_iris(self):
2852
clf = LogisticRegression()
2953
self._perform_run(task_id,num_instances, clf)
3054

31-
3255
def test_run_optimize_randomforest_iris(self):
3356
task_id = 10107
3457
num_instances = 150
3558
num_folds = 10
3659
num_iterations = 5
3760

38-
clf = RandomForestClassifier(n_estimators=10)
61+
clf = RandomForestClassifier(n_estimators=5)
3962
param_dist = {"max_depth": [3, None],
4063
"max_features": [1,2,3,4],
4164
"min_samples_split": [2,3,4,5,6,7,8,9,10],
4265
"min_samples_leaf": [1,2,3,4,5,6,7,8,9,10],
4366
"bootstrap": [True, False],
4467
"criterion": ["gini", "entropy"]}
45-
random_search = RandomizedSearchCV(clf, param_dist,n_iter=num_iterations)
68+
cv = StratifiedKFold(n_splits=3)
69+
random_search = RandomizedSearchCV(clf, param_dist, cv=cv,
70+
n_iter=num_iterations)
4671

47-
run = self._perform_run(task_id,num_instances, random_search)
72+
run = self._perform_run(task_id, num_instances, random_search)
4873
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
4974

5075
def test_run_optimize_bagging_iris(self):
5176
task_id = 10107
5277
num_instances = 150
5378
num_folds = 10
54-
num_iterations = 36 # (num values for C times gamma)
79+
num_iterations = 9 # (num values for C times gamma)
5580

5681
bag = BaggingClassifier(base_estimator=SVC())
57-
param_dist = {"base_estimator__C": [0.001, 0.01, 0.1, 1, 10, 100],
58-
"base_estimator__gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
82+
param_dist = {"base_estimator__C": [0.01, 0.1, 10],
83+
"base_estimator__gamma": [0.01, 0.1, 10]}
5984
grid_search = GridSearchCV(bag, param_dist)
6085

6186
run = self._perform_run(task_id, num_instances, grid_search)

0 commit comments

Comments
 (0)