1111import six
1212import xmltodict
1313
14+ import openml
1415from ..exceptions import PyOpenMLError
1516from .. import config
1617from ..flows import sklearn_to_flow , get_flow , flow_exists , _check_n_jobs , \
@@ -30,20 +31,6 @@ def run_model_on_task(task, model, avoid_duplicate_runs=True, flow_tags=None,
3031 seed = None ):
3132 flow = sklearn_to_flow (model )
3233
33- # returns flow id if the flow exists on the server, False otherwise
34- flow_id = flow_exists (flow .name , flow .external_version )
35-
36- if flow_id == False :
37- # TODO this is potential race condition! someone could upload the
38- # same flow in the meantime!
39- # means the flow did not exists. As we could run it, publish it now
40- flow = flow .publish ()
41- else :
42- # flow already existed, download it from server
43- # TODO (neccessary? is this a post condition of this function)
44- flow_from_server = get_flow (flow_id )
45- _copy_server_fields (flow_from_server , flow )
46-
4734 return run_flow_on_task (task = task , flow = flow ,
4835 avoid_duplicate_runs = avoid_duplicate_runs ,
4936 flow_tags = flow_tags , seed = seed )
@@ -82,6 +69,9 @@ def run_flow_on_task(task, flow, avoid_duplicate_runs=True, flow_tags=None,
8269 # skips the run if it already exists and the user opts for this in the config file.
8370 # also, if the flow is not present on the server, the check is not needed.
8471 if avoid_duplicate_runs :
72+ if flow .flow_id is None :
73+ raise ValueError ('Cannot check if a run exists if the '
74+ 'corresponding flow has not been published yet!' )
8575 flow_from_server = get_flow (flow .flow_id )
8676 setup_id = setup_exists (flow_from_server )
8777 ids = _run_exists (task .task_id , setup_id )
@@ -98,18 +88,43 @@ def run_flow_on_task(task, flow, avoid_duplicate_runs=True, flow_tags=None,
9888
9989 run_environment = _get_version_information ()
10090 tags = ['openml-python' , run_environment [1 ]]
91+
10192 # execute the run
93+ res = _run_task_get_arffcontent (flow .model , task , class_labels )
94+
95+ if flow .flow_id is None :
96+ _publish_flow_if_necessary (flow )
97+
10298 run = OpenMLRun (task_id = task .task_id , flow_id = flow .flow_id ,
10399 dataset_id = dataset .dataset_id , model = flow .model , tags = tags )
104100 run .parameter_settings = OpenMLRun ._parse_parameters (flow )
105- res = _run_task_get_arffcontent ( flow . model , task , class_labels )
101+
106102 run .data_content , run .trace_content , run .trace_attributes , run .detailed_evaluations = res
107103
108104 config .logger .info ('Executed Task %d with Flow id: %d' % (task .task_id , run .flow_id ))
109105
110106 return run
111107
112108
109+ def _publish_flow_if_necessary (flow ):
110+ # try publishing the flow if one has to assume it doesn't exist yet. It
111+ # might fail because it already exists, then the flow is currently not
112+ # reused
113+
114+ try :
115+ flow .publish ()
116+ except OpenMLServerException as e :
117+ if e .message == "flow already exists" :
118+ flow_id = openml .flows .flow_exists (flow .name ,
119+ flow .external_version )
120+ server_flow = get_flow (flow_id )
121+ openml .flows .flow ._copy_server_fields (server_flow , flow )
122+ openml .flows .assert_flows_equal (flow , server_flow ,
123+ ignore_parameters = True )
124+ else :
125+ raise e
126+
127+
113128def get_run_trace (run_id ):
114129 """Get the optimization trace object for a given run id.
115130
0 commit comments