1616from ..util import URLError , version_complies
1717from ..tasks .functions import _create_task_from_xml
1818from .._api_calls import _perform_api_call
19- from .run import OpenMLRun
19+ from .run import OpenMLRun , _get_version_information
2020
2121
2222# _get_version_info, _get_dict and _create_setup_string are in run.py to avoid
2323# circular imports
2424
2525
2626
27- def run_task (task , model , avoid_duplicate_runs = True ):
27+ def run_task (task , model , avoid_duplicate_runs = True , flow_tags = None ):
2828 """Performs a CV run on the dataset of the given task, using the split.
2929
3030 Parameters
@@ -35,13 +35,16 @@ def run_task(task, model, avoid_duplicate_runs=True):
3535 a model which has a function fit(X,Y) and predict(X),
3636 all supervised estimators of scikit learn follow this definition of a model [1]
3737 [1](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
38-
38+ flow_tags : list(str)
39+ a list of tags that the flow should have at creation
3940
4041 Returns
4142 -------
4243 run : OpenMLRun
4344 Result of the run.
4445 """
46+ if flow_tags is not None and not isinstance (flow_tags , list ):
47+ raise ValueError ("flow_tags should be list" )
4548 # TODO move this into its onwn module. While it somehow belongs here, it
4649 # adds quite a lot of functionality which is better suited in other places!
4750 # TODO why doesn't this accept a flow as input? - this would make this more flexible!
@@ -66,8 +69,10 @@ def run_task(task, model, avoid_duplicate_runs=True):
6669 raise ValueError ('The task has no class labels. This method currently '
6770 'only works for tasks with class labels.' )
6871
72+ run_environment = _get_version_information ()
73+ tags = ['openml-python' , run_environment [1 ]]
6974 # execute the run
70- run = OpenMLRun (task_id = task .task_id , flow_id = None , dataset_id = dataset .dataset_id , model = model )
75+ run = OpenMLRun (task_id = task .task_id , flow_id = None , dataset_id = dataset .dataset_id , model = model , tags = tags )
7176 run .data_content , run .trace_content , run .trace_attributes = _run_task_get_arffcontent (model , task , class_labels )
7277
7378 if flow_id == False :
@@ -176,18 +181,20 @@ def _run_task_get_arffcontent(model, task, class_labels):
176181 if version_complies (3 , 3 ):
177182 modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
178183 user_defined_measures ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
179-
180- if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
181- arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
182- model_classes = model_fold .best_estimator_ .classes_
183- else :
184- model_classes = model_fold .classes_
185184 except AttributeError as e :
186185 # typically happens when training a regressor on classification task
187186 raise PyOpenMLError (str (e ))
187+
188+ # extract trace
189+ if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
190+ arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
191+ model_classes = model_fold .best_estimator_ .classes_
192+ else :
193+ model_classes = model_fold .classes_
188194
189195 if version_complies (3 , 3 ):
190196 modelpredict_starttime = time .process_time ()
197+
191198 ProbaY = model_fold .predict_proba (testX )
192199 PredY = model_fold .predict (testX )
193200 if version_complies (3 , 3 ):
@@ -215,6 +222,12 @@ def _run_task_get_arffcontent(model, task, class_labels):
215222
216223
217224def _extract_arfftrace (model , rep_no , fold_no ):
225+ if not isinstance (model , sklearn .model_selection ._search .BaseSearchCV ):
226+ raise ValueError ('model should be instance of' \
227+ ' sklearn.model_selection._search.BaseSearchCV' )
228+ if not hasattr (model , 'cv_results_' ):
229+ raise ValueError ('model should contain `cv_results_`' )
230+
218231 arff_tracecontent = []
219232 for itt_no in range (0 , len (model .cv_results_ ['mean_test_score' ])):
220233 # we use the string values for True and False, as it is defined in this way by the OpenML server
@@ -230,6 +243,12 @@ def _extract_arfftrace(model, rep_no, fold_no):
230243 return arff_tracecontent
231244
232245def _extract_arfftrace_attributes (model ):
246+ if not isinstance (model , sklearn .model_selection ._search .BaseSearchCV ):
247+ raise ValueError ('model should be instance of' \
248+ ' sklearn.model_selection._search.BaseSearchCV' )
249+ if not hasattr (model , 'cv_results_' ):
250+ raise ValueError ('model should contain `cv_results_`' )
251+
233252 # attributes that will be in trace arff, regardless of the model
234253 trace_attributes = [('repeat' , 'NUMERIC' ),
235254 ('fold' , 'NUMERIC' ),
@@ -391,6 +410,15 @@ def _create_run_from_xml(xml):
391410 evaluation_flows [key ] = flow_id
392411
393412 evaluation_flows [key ] = flow_id
413+ tags = None
414+ if 'oml:tag' in run :
415+ if isinstance (run ['oml:tag' ], str ):
416+ tags = [run ['oml:tag' ]]
417+ elif isinstance (run ['oml:tag' ], list ):
418+ tags = run ['oml:tag' ]
419+ else :
420+ raise ValueError ('Received not string and non list as tag item' )
421+
394422
395423 return OpenMLRun (run_id = run_id , uploader = uploader ,
396424 uploader_name = uploader_name , task_id = task_id ,
@@ -401,7 +429,7 @@ def _create_run_from_xml(xml):
401429 parameter_settings = parameters ,
402430 dataset_id = dataset_id , predictions_url = predictions_url ,
403431 evaluations = evaluations ,
404- detailed_evaluations = detailed_evaluations )
432+ detailed_evaluations = detailed_evaluations , tags = tags )
405433
406434
407435def _get_cached_run (run_id ):
0 commit comments