Skip to content

Commit a6d48b3

Browse files
committed
fix issue #177 by changing the order or executing run (first build model, generate predictions, then check openml flow id)
1 parent e42ca51 commit a6d48b3

3 files changed

Lines changed: 27 additions & 15 deletions

File tree

openml/runs/functions.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,6 @@ def run_task(task, model):
3939
# TODO move this into its onwn module. While it somehow belongs here, it
4040
# adds quite a lot of functionality which is better suited in other places!
4141
# TODO why doesn't this accept a flow as input? - this would make this more flexible!
42-
flow = sklearn_to_flow(model)
43-
flow_id = flow._ensure_flow_exists()
44-
if (flow_id < 0):
45-
print("No flow")
46-
return 0, 2
47-
config.logger.info(flow_id)
48-
49-
arff_datacontent = []
5042

5143
dataset = task.get_dataset()
5244
X, Y = dataset.get_data(target=task.target_name)
@@ -56,10 +48,21 @@ def run_task(task, model):
5648
raise ValueError('The task has no class labels. This method currently '
5749
'only works for tasks with class labels.')
5850

59-
run = OpenMLRun(task_id=task.task_id, flow_id=flow_id,
60-
dataset_id=dataset.dataset_id, model=model)
51+
# execute the run
52+
run = OpenMLRun(task_id=task.task_id, flow_id=None, dataset_id=dataset.dataset_id, model=model)
6153
run.data_content, run.trace_content = _run_task_get_arffcontent(model, task, class_labels)
6254

55+
# now generate the flow
56+
flow = sklearn_to_flow(model)
57+
flow_id = flow._ensure_flow_exists()
58+
if flow_id < 0:
59+
print("No flow")
60+
return 0, 2
61+
config.logger.info(flow_id)
62+
63+
# attach the flow to the run
64+
run.flow_id = flow_id
65+
6366
return run
6467

6568

openml/runs/run.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ def publish(self):
133133
"""
134134
if self.model is None:
135135
raise PyOpenMLError("OpenMLRun obj does not contain a model. (This should never happen.) ");
136+
if self.flow_id is None:
137+
raise PyOpenMLError("OpenMLRun obj does not contain a flow id. (Should have been added while executing the task.) ");
138+
136139

137140
predictions = arff.dumps(self._generate_arff_dict())
138141
description_xml = self._create_description_xml()

tests/test_runs/test_run_functions.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from sklearn.linear_model import LogisticRegression, SGDClassifier
1+
from sklearn.linear_model import LogisticRegression, SGDClassifier, LinearRegression
22
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
33
from sklearn.svm import SVC
44
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
@@ -20,6 +20,12 @@ def _perform_run(self, task_id, num_instances, clf):
2020
self.assertEqual(len(run.data_content), num_instances)
2121
return run
2222

23+
def test_run_regression_on_classif_task(self):
24+
task_id = 10107
25+
26+
clf = LinearRegression()
27+
task = openml.tasks.get_task(task_id)
28+
self.assertRaises(AttributeError, openml.runs.run_task, task=task, model=clf)
2329

2430
def test_run_iris(self):
2531
task_id = 10107
@@ -44,19 +50,19 @@ def test_run_optimize_randomforest_iris(self):
4450
"criterion": ["gini", "entropy"]}
4551
random_search = RandomizedSearchCV(clf, param_dist,n_iter=num_iterations)
4652

47-
run = self._perform_run(task_id,num_instances, random_search)
53+
run = self._perform_run(task_id, num_instances, random_search)
4854
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
4955

5056
def test_run_optimize_bagging_iris(self):
5157
task_id = 10107
5258
num_instances = 150
5359
num_folds = 10
54-
num_iterations = 36 # (num values for C times gamma)
60+
num_iterations = 16 # (num values for C times gamma)
5561

5662
task = openml.tasks.get_task(task_id)
5763
bag = BaggingClassifier(base_estimator=SVC())
58-
param_dist = {"base_estimator__C": [0.001, 0.01, 0.1, 1, 10, 100],
59-
"base_estimator__gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
64+
param_dist = {"base_estimator__C": [0.01, 0.1, 1, 10],
65+
"base_estimator__gamma": [0.01, 0.1, 1, 10]}
6066
grid_search = GridSearchCV(bag, param_dist)
6167

6268
run = self._perform_run(task_id, num_instances, grid_search)

0 commit comments

Comments
 (0)