@@ -888,35 +888,53 @@ def _format_external_version(
888888 ) -> str :
889889 return '%s==%s' % (model_package_name , model_package_version_number )
890890
891- def _check_n_jobs (self , model : Any ) -> bool :
892- """Returns True if the parameter settings of model are chosen s.t. the model
893- will run on a single core (if so, openml-python can measure runtimes)"""
894-
895- def check (param_grid , restricted_parameter_name , legal_values ):
896- if isinstance (param_grid , dict ):
897- for param , value in param_grid .items ():
898- # n_jobs is scikitlearn parameter for paralizing jobs
899- if param .split ('__' )[- 1 ] == restricted_parameter_name :
900- # 0 = illegal value (?), 1 / None = use one core,
901- # n = use n cores,
902- # -1 = use all available cores -> this makes it hard to
903- # measure runtime in a fair way
904- if legal_values is None or value not in legal_values :
905- return False
906- return True
907- elif isinstance (param_grid , list ):
908- return all (
909- check (sub_grid , restricted_parameter_name , legal_values )
910- for sub_grid in param_grid
911- )
891+ @staticmethod
892+ def _get_parameter_values_recursive (param_grid : Union [Dict , List [Dict ]],
893+ parameter_name : str ) -> List [Any ]:
894+ """
895+ Returns a list of values for a given hyperparameter, encountered
896+ recursively throughout the flow. (e.g., n_jobs can be defined
897+ for various flows)
912898
913- if not (
914- isinstance (model , sklearn .base .BaseEstimator ) or self .is_hpo_class (model )
915- ):
916- raise ValueError ('model should be BaseEstimator or BaseSearchCV' )
899+ Parameters
900+ ----------
901+ param_grid: Union[Dict, List[Dict]]
902+ Dict mapping from hyperparameter list to value, to a list of
903+ such dicts
904+
905+ parameter_name: str
906+ The hyperparameter that needs to be inspected
907+
908+ Returns
909+ -------
910+ List
911+ A list of all values of hyperparameters with this name
912+ """
913+ if isinstance (param_grid , dict ):
914+ result = list ()
915+ for param , value in param_grid .items ():
916+ # n_jobs is scikit-learn parameter for parallelizing jobs
917+ if param .split ('__' )[- 1 ] == parameter_name :
918+ result .append (value )
919+ return result
920+ elif isinstance (param_grid , list ):
921+ result = list ()
922+ for sub_grid in param_grid :
923+ result .extend (SklearnExtension ._get_parameter_values_recursive (sub_grid ,
924+ parameter_name ))
925+ return result
926+ else :
927+ raise ValueError ('Param_grid should either be a dict or list of dicts' )
917928
918- # make sure that n_jobs is not in the parameter grid of optimization
919- # procedure
929+ def _prevent_optimize_n_jobs (self , model ):
930+ """
931+ Ensures that HPO classes will not optimize the n_jobs hyperparameter
932+
933+ Parameters:
934+ -----------
935+ model:
936+ The model that will be fitted
937+ """
920938 if self .is_hpo_class (model ):
921939 if isinstance (model , sklearn .model_selection .GridSearchCV ):
922940 param_distributions = model .param_grid
@@ -933,13 +951,62 @@ def check(param_grid, restricted_parameter_name, legal_values):
933951 print ('Warning! Using subclass BaseSearchCV other than '
934952 '{GridSearchCV, RandomizedSearchCV}. '
935953 'Should implement param check. ' )
936-
937- if not check (param_distributions , 'n_jobs' , None ):
954+ n_jobs_vals = SklearnExtension ._get_parameter_values_recursive (param_distributions ,
955+ 'n_jobs' )
956+ if len (n_jobs_vals ) > 0 :
938957 raise PyOpenMLError ('openml-python should not be used to '
939958 'optimize the n_jobs parameter.' )
940959
960+ def _can_measure_cputime (self , model : Any ) -> bool :
961+ """
962+ Returns True if the parameter settings of model are chosen s.t. the model
963+ will run on a single core (if so, openml-python can measure cpu-times)
964+
965+ Parameters:
966+ -----------
967+ model:
968+ The model that will be fitted
969+
970+ Returns:
971+ --------
972+ bool:
973+ True if all n_jobs parameters will be either set to None or 1, False otherwise
974+ """
975+ if not (
976+ isinstance (model , sklearn .base .BaseEstimator ) or self .is_hpo_class (model )
977+ ):
978+ raise ValueError ('model should be BaseEstimator or BaseSearchCV' )
979+
941980 # check the parameters for n_jobs
942- return check (model .get_params (), 'n_jobs' , [1 , None ])
981+ n_jobs_vals = SklearnExtension ._get_parameter_values_recursive (model .get_params (), 'n_jobs' )
982+ for val in n_jobs_vals :
983+ if val is not None and val != 1 :
984+ return False
985+ return True
986+
987+ def _can_measure_wallclocktime (self , model : Any ) -> bool :
988+ """
989+ Returns True if the parameter settings of model are chosen s.t. the model
990+ will run on a preset number of cores (if so, openml-python can measure wall-clock time)
991+
992+ Parameters:
993+ -----------
994+ model:
995+ The model that will be fitted
996+
997+ Returns:
998+ --------
999+ bool:
1000+ True if no n_jobs parameters is set to -1, False otherwise
1001+ """
1002+ if not (
1003+ isinstance (model , sklearn .base .BaseEstimator ) or self .is_hpo_class (model )
1004+ ):
1005+ raise ValueError ('model should be BaseEstimator or BaseSearchCV' )
1006+
1007+ # check the parameters for n_jobs
1008+ n_jobs_vals = SklearnExtension ._get_parameter_values_recursive (model .get_params (), 'n_jobs' )
1009+ return - 1 not in n_jobs_vals
9431010
9441011 ################################################################################################
9451012 # Methods for performing runs with extension modules
@@ -1037,6 +1104,12 @@ def _run_model_on_fold(
10371104 """Run a model on a repeat,fold,subsample triplet of the task and return prediction
10381105 information.
10391106
1107+ Furthermore, it will measure run time measures in case multi-core behaviour allows this.
1108+ * exact user cpu time will be measured if the number of cores is set (recursive throughout
1109+ the model) exactly to 1
1110+ * wall clock time will be measured if the number of cores is set (recursive throughout the
1111+ model) to any given number (but not when it is set to -1)
1112+
10401113 Returns the data that is necessary to construct the OpenML Run object. Is used by
10411114 run_task_get_arff_content. Do not use this function unless you know what you are doing.
10421115
@@ -1112,8 +1185,11 @@ def _prediction_to_probabilities(
11121185 # but not desirable if we want to upload to OpenML).
11131186
11141187 model_copy = sklearn .base .clone (model , safe = True )
1188+ # sanity check: prohibit users from optimizing n_jobs
1189+ self ._prevent_optimize_n_jobs (model_copy )
11151190 # Runtime can be measured if the model is run sequentially
1116- can_measure_runtime = self ._check_n_jobs (model_copy )
1191+ can_measure_cputime = self ._can_measure_cputime (model_copy )
1192+ can_measure_wallclocktime = self ._can_measure_wallclocktime (model_copy )
11171193
11181194 train_indices , test_indices = task .get_train_test_split_indices (
11191195 repeat = rep_no , fold = fold_no , sample = sample_no )
@@ -1133,17 +1209,21 @@ def _prediction_to_probabilities(
11331209
11341210 try :
11351211 # for measuring runtime. Only available since Python 3.3
1136- if can_measure_runtime :
1137- modelfit_starttime = time .process_time ()
1212+ modelfit_start_cputime = time . process_time ()
1213+ modelfit_start_walltime = time .time ()
11381214
11391215 if isinstance (task , OpenMLSupervisedTask ):
11401216 model_copy .fit (train_x , train_y )
11411217 elif isinstance (task , OpenMLClusteringTask ):
11421218 model_copy .fit (train_x )
11431219
1144- if can_measure_runtime :
1145- modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
1146- user_defined_measures ['usercpu_time_millis_training' ] = modelfit_duration
1220+ modelfit_dur_cputime = (time .process_time () - modelfit_start_cputime ) * 1000
1221+ if can_measure_cputime :
1222+ user_defined_measures ['usercpu_time_millis_training' ] = modelfit_dur_cputime
1223+
1224+ modelfit_dur_walltime = (time .time () - modelfit_start_walltime ) * 1000
1225+ if can_measure_wallclocktime :
1226+ user_defined_measures ['wall_clock_time_millis_training' ] = modelfit_dur_walltime
11471227
11481228 except AttributeError as e :
11491229 # typically happens when training a regressor on classification task
@@ -1169,17 +1249,24 @@ def _prediction_to_probabilities(
11691249 else :
11701250 model_classes = used_estimator .classes_
11711251
1172- if can_measure_runtime :
1173- modelpredict_starttime = time .process_time ()
1252+ modelpredict_start_cputime = time . process_time ()
1253+ modelpredict_start_walltime = time .time ()
11741254
11751255 # In supervised learning this returns the predictions for Y, in clustering
11761256 # it returns the clusters
11771257 pred_y = model_copy .predict (test_x )
11781258
1179- if can_measure_runtime :
1180- modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
1181- user_defined_measures ['usercpu_time_millis_testing' ] = modelpredict_duration
1182- user_defined_measures ['usercpu_time_millis' ] = modelfit_duration + modelpredict_duration
1259+ if can_measure_cputime :
1260+ modelpredict_duration_cputime = (time .process_time ()
1261+ - modelpredict_start_cputime ) * 1000
1262+ user_defined_measures ['usercpu_time_millis_testing' ] = modelpredict_duration_cputime
1263+ user_defined_measures ['usercpu_time_millis' ] = (modelfit_dur_cputime
1264+ + modelpredict_duration_cputime )
1265+ if can_measure_wallclocktime :
1266+ modelpredict_duration_walltime = (time .time () - modelpredict_start_walltime ) * 1000
1267+ user_defined_measures ['wall_clock_time_millis_testing' ] = modelpredict_duration_walltime
1268+ user_defined_measures ['wall_clock_time_millis' ] = (modelfit_dur_walltime
1269+ + modelpredict_duration_walltime )
11831270
11841271 # add client-side calculated metrics. These is used on the server as
11851272 # consistency check, only useful for supervised tasks
0 commit comments