Skip to content

Commit 4dbc93c

Browse files
committed
changed order of flow creation / run execution (first, after, serialize flow, then run task, then publish flow)
removed _ensure_flow_exists function (trivial and unused)
1 parent f9d720b commit 4dbc93c

2 files changed

Lines changed: 15 additions & 27 deletions

File tree

openml/flows/flow.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -340,26 +340,6 @@ def publish(self):
340340
self.flow_id = int(xmltodict.parse(return_value)['oml:upload_flow']['oml:id'])
341341
return self
342342

343-
def _ensure_flow_exists(self):
344-
""" Checks if a flow exists for the given model and possibly creates it.
345-
346-
If the given flow exists on the server, the flow-id will simply
347-
be returned. Otherwise it will be uploaded to the server.
348-
349-
Returns
350-
-------
351-
flow_id : int
352-
Flow id on the server.
353-
"""
354-
_, flow_id = _check_flow_exists(self.name, self.external_version)
355-
# TODO add numpy and scipy version!
356-
357-
if int(flow_id) == -1:
358-
flow = self.publish()
359-
return int(flow.flow_id)
360-
361-
return int(flow_id)
362-
363343

364344
def _check_flow_exists(name, version):
365345
"""Retrieves the flow id of the flow uniquely identified by name+version.

openml/runs/functions.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import xmltodict
55
import numpy as np
66
import warnings
7+
import openml
78
from sklearn.model_selection._search import BaseSearchCV
89

910
from build.lib.openml.exceptions import PyOpenMLError
@@ -44,14 +45,13 @@ def run_task(task, model):
4445
# adds quite a lot of functionality which is better suited in other places!
4546
# TODO why doesn't this accept a flow as input? - this would make this more flexible!
4647
flow = sklearn_to_flow(model)
47-
flow_id = flow._ensure_flow_exists()
48-
if flow_id < 0:
49-
print("No flow")
50-
return 0, 2
51-
config.logger.info(flow_id)
5248

53-
if config.avoid_duplicate_runs:
54-
# TODO: would be nice if flow._ensure_flow_exists already handled this
49+
# returns flow id if the flow exists on the server, -1 otherwise
50+
_, flow_id = openml.flows._check_flow_exists(flow.name, flow.external_version)
51+
52+
# skips the run if it already exists and the user opts for this in the config file.
53+
# also, if the flow is not present on the server, the check is not needed.
54+
if config.avoid_duplicate_runs and flow_id > 0:
5555
flow = get_flow(flow_id)
5656
setup_id = setup_exists(flow, model)
5757
ids = _run_exists(task.task_id, setup_id)
@@ -75,6 +75,14 @@ def run_task(task, model):
7575
run.error_message = str(message)
7676
warnings.warn("Run terminated with error: %s" %run.error_message)
7777

78+
if flow_id < 0:
79+
flow.publish()
80+
config.logger.info(flow_id)
81+
82+
# attach the flow to the run
83+
run.flow_id = flow_id
84+
85+
7886
return run
7987

8088
def _run_exists(task_id, setup_id):

0 commit comments

Comments
 (0)