@@ -106,7 +106,13 @@ def run_flow_on_task(task, flow, avoid_duplicate_runs=True, flow_tags=None,
106106 dataset_id = dataset .dataset_id , model = flow .model , tags = tags )
107107 run .parameter_settings = OpenMLRun ._parse_parameters (flow )
108108
109- run .data_content , run .trace_content , run .trace_attributes , run .detailed_evaluations = res
109+ run .data_content , run .trace_content , run .trace_attributes , fold_evaluations , sample_evaluations = res
110+ # now we need to attach the detailed evaluations
111+ if task .task_type_id == 3 :
112+ run .sample_evaluations = sample_evaluations
113+ else :
114+ run .fold_evaluations = fold_evaluations
115+
110116
111117 config .logger .info ('Executed Task %d with Flow id: %d' % (task .task_id , run .flow_id ))
112118
@@ -299,15 +305,20 @@ def _seed_current_object(current_value):
299305 return model
300306
301307
302- def _prediction_to_row (rep_no , fold_no , row_id , correct_label , predicted_label ,
303- predicted_probabilities , class_labels , model_classes_mapping ):
308+ def _prediction_to_row (rep_no , fold_no , sample_no , row_id , correct_label ,
309+ predicted_label , predicted_probabilities , class_labels ,
310+ model_classes_mapping ):
304311 """Util function that turns probability estimates of a classifier for a given
305312 instance into the right arff format to upload to openml.
306313
307314 Parameters
308315 ----------
309316 rep_no : int
317+ The repeat of the experiment (0-based; in case of 1 time CV, always 0)
310318 fold_no : int
319+ The fold nr of the experiment (0-based; in case of holdout, always 0)
320+ sample_no : int
321+ In case of learning curves, the index of the subsample (0-based; in case of no learning curve, always 0)
311322 row_id : int
312323 row id in the initial dataset
313324 correct_label : str
@@ -328,11 +339,12 @@ def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
328339 """
329340 if not isinstance (rep_no , (int , np .integer )): raise ValueError ('rep_no should be int' )
330341 if not isinstance (fold_no , (int , np .integer )): raise ValueError ('fold_no should be int' )
342+ if not isinstance (sample_no , (int , np .integer )): raise ValueError ('sample_no should be int' )
331343 if not isinstance (row_id , (int , np .integer )): raise ValueError ('row_id should be int' )
332344 if not len (predicted_probabilities ) == len (model_classes_mapping ):
333345 raise ValueError ('len(predicted_probabilities) != len(class_labels)' )
334346
335- arff_line = [rep_no , fold_no , row_id ]
347+ arff_line = [rep_no , fold_no , sample_no , row_id ]
336348 for class_label_idx in range (len (class_labels )):
337349 if class_label_idx in model_classes_mapping :
338350 index = np .where (model_classes_mapping == class_label_idx )[0 ][0 ] # TODO: WHY IS THIS 2D???
@@ -349,82 +361,93 @@ def _run_task_get_arffcontent(model, task, class_labels):
349361 X , Y = task .get_X_and_y ()
350362 arff_datacontent = []
351363 arff_tracecontent = []
352- user_defined_measures = defaultdict (lambda : defaultdict (dict ))
364+ user_defined_measures_fold = defaultdict (lambda : defaultdict (dict ))
365+ user_defined_measures_sample = defaultdict (lambda : defaultdict (lambda : defaultdict (dict )))
353366
354- rep_no = 0
355367 # sys.version_info returns a tuple, the following line compares the entry of tuples
356368 # https://docs.python.org/3.6/reference/expressions.html#value-comparisons
357369 can_measure_runtime = sys .version_info [:2 ] >= (3 , 3 ) and _check_n_jobs (model )
358370 # TODO use different iterator to only provide a single iterator (less
359371 # methods, less maintenance, less confusion)
360- for rep in task .iterate_repeats ():
361- fold_no = 0
362- for fold in rep :
363- model_fold = sklearn .base .clone (model , safe = True )
364- train_indices , test_indices = fold
365- trainX = X [train_indices ]
366- trainY = Y [train_indices ]
367- testX = X [test_indices ]
368- testY = Y [test_indices ]
369-
370- try :
371- # for measuring runtime. Only available since Python 3.3
372- if can_measure_runtime :
373- modelfit_starttime = time .process_time ()
374- model_fold .fit (trainX , trainY )
375-
376- if can_measure_runtime :
377- modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
378- user_defined_measures ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
379- except AttributeError as e :
380- # typically happens when training a regressor on classification task
381- raise PyOpenMLError (str (e ))
382-
383- # extract trace, if applicable
384- if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
385- arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
386-
387- # search for model classes_ (might differ depending on modeltype)
388- # first, pipelines are a special case (these don't have a classes_
389- # object, but rather borrows it from the last step. We do this manually,
390- # because of the BaseSearch check)
391- if isinstance (model_fold , sklearn .pipeline .Pipeline ):
392- used_estimator = model_fold .steps [- 1 ][- 1 ]
393- else :
394- used_estimator = model_fold
372+ num_reps , num_folds , num_samples = task .get_split_dimensions ()
373+
374+ for rep_no in range (num_reps ):
375+ for fold_no in range (num_folds ):
376+ for sample_no in range (num_samples ):
377+ model_fold = sklearn .base .clone (model , safe = True )
378+ train_indices , test_indices = task .get_train_test_split_indices (repeat = rep_no ,
379+ fold = fold_no ,
380+ sample = sample_no )
381+ trainX = X [train_indices ]
382+ trainY = Y [train_indices ]
383+ testX = X [test_indices ]
384+ testY = Y [test_indices ]
385+
386+ try :
387+ # for measuring runtime. Only available since Python 3.3
388+ if can_measure_runtime :
389+ modelfit_starttime = time .process_time ()
390+ model_fold .fit (trainX , trainY )
391+
392+ if can_measure_runtime :
393+ modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
394+ user_defined_measures_sample ['usercpu_time_millis_training' ][rep_no ][fold_no ][sample_no ] = modelfit_duration
395+ user_defined_measures_fold ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
396+ except AttributeError as e :
397+ # typically happens when training a regressor on classification task
398+ raise PyOpenMLError (str (e ))
399+
400+ # extract trace, if applicable
401+ if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
402+ arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
403+
404+ # search for model classes_ (might differ depending on modeltype)
405+ # first, pipelines are a special case (these don't have a classes_
406+ # object, but rather borrows it from the last step. We do this manually,
407+ # because of the BaseSearch check)
408+ if isinstance (model_fold , sklearn .pipeline .Pipeline ):
409+ used_estimator = model_fold .steps [- 1 ][- 1 ]
410+ else :
411+ used_estimator = model_fold
395412
396- if isinstance (used_estimator , sklearn .model_selection ._search .BaseSearchCV ):
397- model_classes = used_estimator .best_estimator_ .classes_
398- else :
399- model_classes = used_estimator .classes_
413+ if isinstance (used_estimator , sklearn .model_selection ._search .BaseSearchCV ):
414+ model_classes = used_estimator .best_estimator_ .classes_
415+ else :
416+ model_classes = used_estimator .classes_
400417
401- if can_measure_runtime :
402- modelpredict_starttime = time .process_time ()
403-
404- ProbaY = model_fold .predict_proba (testX )
405- PredY = model_fold .predict (testX )
406- if can_measure_runtime :
407- modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
408- user_defined_measures ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
409- user_defined_measures ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
418+ if can_measure_runtime :
419+ modelpredict_starttime = time .process_time ()
410420
411- if ProbaY .shape [1 ] != len (class_labels ):
412- warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
421+ ProbaY = model_fold .predict_proba (testX )
422+ PredY = model_fold .predict (testX )
423+ if can_measure_runtime :
424+ modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
425+ user_defined_measures_fold ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
426+ user_defined_measures_fold ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
427+ user_defined_measures_sample ['usercpu_time_millis_testing' ][rep_no ][fold_no ][sample_no ] = modelpredict_duration
428+ user_defined_measures_sample ['usercpu_time_millis' ][rep_no ][fold_no ][sample_no ] = modelfit_duration + modelpredict_duration
413429
414- for i in range (0 , len (test_indices )):
415- arff_line = _prediction_to_row (rep_no , fold_no , test_indices [i ], class_labels [testY [i ]], PredY [i ], ProbaY [i ], class_labels , model_classes )
416- arff_datacontent .append (arff_line )
430+ if ProbaY .shape [1 ] != len (class_labels ):
431+ warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
417432
418- fold_no = fold_no + 1
419- rep_no = rep_no + 1
433+ for i in range (0 , len (test_indices )):
434+ arff_line = _prediction_to_row (rep_no , fold_no , sample_no ,
435+ test_indices [i ], class_labels [testY [i ]],
436+ PredY [i ], ProbaY [i ], class_labels , model_classes )
437+ arff_datacontent .append (arff_line )
420438
421439 if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
422440 # arff_tracecontent is already set
423441 arff_trace_attributes = _extract_arfftrace_attributes (model_fold )
424442 else :
425443 arff_tracecontent = None
426444 arff_trace_attributes = None
427- return arff_datacontent , arff_tracecontent , arff_trace_attributes , user_defined_measures
445+
446+ return arff_datacontent , \
447+ arff_tracecontent , \
448+ arff_trace_attributes , \
449+ user_defined_measures_fold , \
450+ user_defined_measures_sample
428451
429452
430453def _extract_arfftrace (model , rep_no , fold_no ):
0 commit comments