55import numpy as np
66import warnings
77import sklearn
8+ import time
9+ from sklearn .model_selection ._search import BaseSearchCV
810
911from ..exceptions import PyOpenMLError
1012from .. import config
1113from ..flows import sklearn_to_flow , get_flow , flow_exists
1214from ..setups import setup_exists
1315from ..exceptions import OpenMLCacheException , OpenMLServerException
14- from ..util import URLError
16+ from ..util import URLError , version_complies
1517from ..tasks .functions import _create_task_from_xml
1618from .._api_calls import _perform_api_call
1719from .run import OpenMLRun
@@ -68,7 +70,6 @@ def run_task(task, model, avoid_duplicate_runs=True):
6870 run = OpenMLRun (task_id = task .task_id , flow_id = None , dataset_id = dataset .dataset_id , model = model )
6971 run .data_content , run .trace_content , run .trace_attributes = _run_task_get_arffcontent (model , task , class_labels )
7072
71-
7273 if flow_id == False :
7374 # means the flow did not exists. As we could run it, publish it now
7475 flow = flow .publish ()
@@ -151,6 +152,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
151152 X , Y = task .get_X_and_y ()
152153 arff_datacontent = []
153154 arff_tracecontent = []
155+ user_defined_measures = defaultdict (lambda : defaultdict (dict ))
154156
155157 rep_no = 0
156158 # TODO use different iterator to only provide a single iterator (less
@@ -166,8 +168,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
166168 testY = Y [test_indices ]
167169
168170 try :
171+ # for measuring runtime. Only available since Python 3.3
172+ if version_complies (3 , 3 ):
173+ modelfit_starttime = time .process_time ()
169174 model_fold .fit (trainX , trainY )
170175
176+ if version_complies (3 , 3 ):
177+ modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
178+ user_defined_measures ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
179+
171180 if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
172181 arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
173182 model_classes = model_fold .best_estimator_ .classes_
@@ -177,8 +186,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
177186 # typically happens when training a regressor on classification task
178187 raise PyOpenMLError (str (e ))
179188
189+ if version_complies (3 , 3 ):
190+ modelpredict_starttime = time .process_time ()
180191 ProbaY = model_fold .predict_proba (testX )
181192 PredY = model_fold .predict (testX )
193+ if version_complies (3 , 3 ):
194+ modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
195+ user_defined_measures ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
196+ user_defined_measures ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
197+
182198 if ProbaY .shape [1 ] != len (class_labels ):
183199 warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
184200
@@ -195,7 +211,6 @@ def _run_task_get_arffcontent(model, task, class_labels):
195211 else :
196212 arff_tracecontent = None
197213 arff_trace_attributes = None
198-
199214 return arff_datacontent , arff_tracecontent , arff_trace_attributes
200215
201216
@@ -397,7 +412,7 @@ def _get_cached_run(run_id):
397412 run_file = os .path .join (run_cache_dir ,
398413 "run_%d.xml" % int (run_id ))
399414 with io .open (run_file , encoding = 'utf8' ) as fh :
400- run = _create_task_from_xml (xml = fh .read ())
415+ run = _create_run_from_xml (xml = fh .read ())
401416 return run
402417
403418 except (OSError , IOError ):
0 commit comments