55import numpy as np
66import warnings
77import sklearn
8+ import time
89from sklearn .model_selection ._search import BaseSearchCV
910
1011from build .lib .openml .exceptions import PyOpenMLError
1112from .. import config
1213from ..flows import sklearn_to_flow , get_flow
1314from ..setups import setup_exists
1415from ..exceptions import OpenMLCacheException , OpenMLServerException
15- from ..util import URLError
16+ from ..util import URLError , version_complies
1617from ..tasks .functions import _create_task_from_xml
1718from .._api_calls import _perform_api_call
1819from .run import OpenMLRun
@@ -70,7 +71,7 @@ def run_task(task, model):
7071 run = OpenMLRun (task_id = task .task_id , flow_id = flow_id , dataset_id = dataset .dataset_id , model = model )
7172
7273 try :
73- run .data_content , run .trace_content = _run_task_get_arffcontent (model , task , class_labels )
74+ run .data_content , run .trace_content , run . detailed_evaluations = _run_task_get_arffcontent (model , task , class_labels )
7475 except PyOpenMLError as message :
7576 run .error_message = str (message )
7677 warnings .warn ("Run terminated with error: %s" % run .error_message )
@@ -141,6 +142,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
141142 X , Y = task .get_X_and_y ()
142143 arff_datacontent = []
143144 arff_tracecontent = []
145+ user_defined_measures = defaultdict (lambda : defaultdict (dict ))
144146
145147 rep_no = 0
146148 # TODO use different iterator to only provide a single iterator (less
@@ -156,8 +158,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
156158 testY = Y [test_indices ]
157159
158160 try :
161+ # for measuring runtime. Only available since Python 3.3
162+ if version_complies (3 , 3 ):
163+ modelfit_starttime = time .process_time ()
159164 model_fold .fit (trainX , trainY )
160165
166+ if version_complies (3 , 3 ):
167+ modelfit_duration = time .process_time () - modelfit_starttime
168+ user_defined_measures ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
169+
161170 if isinstance (model_fold , BaseSearchCV ):
162171 _add_results_to_arfftrace (arff_tracecontent , fold_no , model_fold , rep_no )
163172 model_classes = model_fold .best_estimator_ .classes_
@@ -167,8 +176,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
167176 # typically happens when training a regressor on classification task
168177 raise PyOpenMLError (str (e ))
169178
179+ if version_complies (3 , 3 ):
180+ modelpredict_starttime = time .process_time ()
170181 ProbaY = model_fold .predict_proba (testX )
171182 PredY = model_fold .predict (testX )
183+ if version_complies (3 , 3 ):
184+ modelpredict_duration = time .process_time () - modelpredict_starttime
185+ user_defined_measures ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
186+ user_defined_measures ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
187+
172188 if ProbaY .shape [1 ] != len (class_labels ):
173189 warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
174190
@@ -182,10 +198,20 @@ def _run_task_get_arffcontent(model, task, class_labels):
182198 if not isinstance (model , BaseSearchCV ):
183199 arff_tracecontent = None
184200
185- return arff_datacontent , arff_tracecontent
201+ return arff_datacontent , arff_tracecontent , user_defined_measures
186202
187203
188204def _add_results_to_arfftrace (arff_tracecontent , fold_no , model , rep_no ):
205+ '''
206+ Extracts the various results calculated by `BaseSearchCV` classes into openml trace arff format
207+
208+ :param arff_tracecontent: the list that the results should be appended to
209+ :param fold_no: cv fold number
210+ :param model: the model to extract from
211+ :param rep_no: cv repetition number
212+
213+ :return: A list lists, each representing an arff line
214+ '''
189215 for itt_no in range (0 , len (model .cv_results_ ['mean_test_score' ])):
190216 # we use the string values for True and False, as it is defined in this way by the OpenML server
191217 selected = 'false'
@@ -350,7 +376,7 @@ def _get_cached_run(run_id):
350376 run_file = os .path .join (run_cache_dir ,
351377 "run_%d.xml" % int (run_id ))
352378 with io .open (run_file , encoding = 'utf8' ) as fh :
353- run = _create_task_from_xml (xml = fh .read ())
379+ run = _create_run_from_xml (xml = fh .read ())
354380 return run
355381
356382 except (OSError , IOError ):
0 commit comments