55import numpy as np
66import warnings
77import sklearn
8+ import time
9+ from sklearn .model_selection ._search import BaseSearchCV
810
911from ..exceptions import PyOpenMLError
1012from .. import config
1113from ..flows import sklearn_to_flow , get_flow , flow_exists
1214from ..setups import setup_exists
1315from ..exceptions import OpenMLCacheException , OpenMLServerException
14- from ..util import URLError
16+ from ..util import URLError , version_complies
1517from ..tasks .functions import _create_task_from_xml
1618from .._api_calls import _perform_api_call
1719from .run import OpenMLRun , _get_version_information
@@ -155,6 +157,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
155157 X , Y = task .get_X_and_y ()
156158 arff_datacontent = []
157159 arff_tracecontent = []
160+ user_defined_measures = defaultdict (lambda : defaultdict (dict ))
158161
159162 rep_no = 0
160163 # TODO use different iterator to only provide a single iterator (less
@@ -170,20 +173,41 @@ def _run_task_get_arffcontent(model, task, class_labels):
170173 testY = Y [test_indices ]
171174
172175 try :
176+ # for measuring runtime. Only available since Python 3.3
177+ if version_complies (3 , 3 ):
178+ modelfit_starttime = time .process_time ()
173179 model_fold .fit (trainX , trainY )
180+
181+ if version_complies (3 , 3 ):
182+ modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
183+ user_defined_measures ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
184+
185+ if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
186+ arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
187+ model_classes = model_fold .best_estimator_ .classes_
188+ else :
189+ model_classes = model_fold .classes_
174190 except AttributeError as e :
175191 # typically happens when training a regressor on classification task
176192 raise PyOpenMLError (str (e ))
177-
193+
178194 # extract trace
179195 if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
180196 arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
181197 model_classes = model_fold .best_estimator_ .classes_
182198 else :
183199 model_classes = model_fold .classes_
184200
201+ if version_complies (3 , 3 ):
202+ modelpredict_starttime = time .process_time ()
203+
185204 ProbaY = model_fold .predict_proba (testX )
186205 PredY = model_fold .predict (testX )
206+ if version_complies (3 , 3 ):
207+ modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
208+ user_defined_measures ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
209+ user_defined_measures ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
210+
187211 if ProbaY .shape [1 ] != len (class_labels ):
188212 warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
189213
@@ -200,7 +224,6 @@ def _run_task_get_arffcontent(model, task, class_labels):
200224 else :
201225 arff_tracecontent = None
202226 arff_trace_attributes = None
203-
204227 return arff_datacontent , arff_tracecontent , arff_trace_attributes
205228
206229
@@ -423,7 +446,7 @@ def _get_cached_run(run_id):
423446 run_file = os .path .join (run_cache_dir ,
424447 "run_%d.xml" % int (run_id ))
425448 with io .open (run_file , encoding = 'utf8' ) as fh :
426- run = _create_task_from_xml (xml = fh .read ())
449+ run = _create_run_from_xml (xml = fh .read ())
427450 return run
428451
429452 except (OSError , IOError ):
0 commit comments