Skip to content

Commit b8fdd17

Browse files
committed
learning curve support
1 parent 3231e63 commit b8fdd17

4 files changed

Lines changed: 162 additions & 103 deletions

File tree

openml/runs/functions.py

Lines changed: 85 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,13 @@ def run_flow_on_task(task, flow, avoid_duplicate_runs=True, flow_tags=None,
106106
dataset_id=dataset.dataset_id, model=flow.model, tags=tags)
107107
run.parameter_settings = OpenMLRun._parse_parameters(flow)
108108

109-
run.data_content, run.trace_content, run.trace_attributes, run.detailed_evaluations = res
109+
run.data_content, run.trace_content, run.trace_attributes, fold_evaluations, sample_evaluations = res
110+
# now we need to attach the detailed evaluations
111+
if task.task_type_id == 3:
112+
run.sample_evaluations = sample_evaluations
113+
else:
114+
run.fold_evaluations = fold_evaluations
115+
110116

111117
config.logger.info('Executed Task %d with Flow id: %d' % (task.task_id, run.flow_id))
112118

@@ -299,15 +305,20 @@ def _seed_current_object(current_value):
299305
return model
300306

301307

302-
def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
303-
predicted_probabilities, class_labels, model_classes_mapping):
308+
def _prediction_to_row(rep_no, fold_no, sample_no, row_id, correct_label,
309+
predicted_label, predicted_probabilities, class_labels,
310+
model_classes_mapping):
304311
"""Util function that turns probability estimates of a classifier for a given
305312
instance into the right arff format to upload to openml.
306313
307314
Parameters
308315
----------
309316
rep_no : int
317+
The repeat of the experiment (0-based; in case of 1 time CV, always 0)
310318
fold_no : int
319+
The fold nr of the experiment (0-based; in case of holdout, always 0)
320+
sample_no : int
321+
In case of learning curves, the index of the subsample (0-based; in case of no learning curve, always 0)
311322
row_id : int
312323
row id in the initial dataset
313324
correct_label : str
@@ -328,11 +339,12 @@ def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
328339
"""
329340
if not isinstance(rep_no, (int, np.integer)): raise ValueError('rep_no should be int')
330341
if not isinstance(fold_no, (int, np.integer)): raise ValueError('fold_no should be int')
342+
if not isinstance(sample_no, (int, np.integer)): raise ValueError('sample_no should be int')
331343
if not isinstance(row_id, (int, np.integer)): raise ValueError('row_id should be int')
332344
if not len(predicted_probabilities) == len(model_classes_mapping):
333345
raise ValueError('len(predicted_probabilities) != len(class_labels)')
334346

335-
arff_line = [rep_no, fold_no, row_id]
347+
arff_line = [rep_no, fold_no, sample_no, row_id]
336348
for class_label_idx in range(len(class_labels)):
337349
if class_label_idx in model_classes_mapping:
338350
index = np.where(model_classes_mapping == class_label_idx)[0][0] # TODO: WHY IS THIS 2D???
@@ -349,82 +361,93 @@ def _run_task_get_arffcontent(model, task, class_labels):
349361
X, Y = task.get_X_and_y()
350362
arff_datacontent = []
351363
arff_tracecontent = []
352-
user_defined_measures = defaultdict(lambda: defaultdict(dict))
364+
user_defined_measures_fold = defaultdict(lambda: defaultdict(dict))
365+
user_defined_measures_sample = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
353366

354-
rep_no = 0
355367
# sys.version_info returns a tuple, the following line compares the entry of tuples
356368
# https://docs.python.org/3.6/reference/expressions.html#value-comparisons
357369
can_measure_runtime = sys.version_info[:2] >= (3, 3) and _check_n_jobs(model)
358370
# TODO use different iterator to only provide a single iterator (less
359371
# methods, less maintenance, less confusion)
360-
for rep in task.iterate_repeats():
361-
fold_no = 0
362-
for fold in rep:
363-
model_fold = sklearn.base.clone(model, safe=True)
364-
train_indices, test_indices = fold
365-
trainX = X[train_indices]
366-
trainY = Y[train_indices]
367-
testX = X[test_indices]
368-
testY = Y[test_indices]
369-
370-
try:
371-
# for measuring runtime. Only available since Python 3.3
372-
if can_measure_runtime:
373-
modelfit_starttime = time.process_time()
374-
model_fold.fit(trainX, trainY)
375-
376-
if can_measure_runtime:
377-
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
378-
user_defined_measures['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration
379-
except AttributeError as e:
380-
# typically happens when training a regressor on classification task
381-
raise PyOpenMLError(str(e))
382-
383-
# extract trace, if applicable
384-
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
385-
arff_tracecontent.extend(_extract_arfftrace(model_fold, rep_no, fold_no))
386-
387-
# search for model classes_ (might differ depending on modeltype)
388-
# first, pipelines are a special case (these don't have a classes_
389-
# object, but rather borrows it from the last step. We do this manually,
390-
# because of the BaseSearch check)
391-
if isinstance(model_fold, sklearn.pipeline.Pipeline):
392-
used_estimator = model_fold.steps[-1][-1]
393-
else:
394-
used_estimator = model_fold
372+
num_reps, num_folds, num_samples = task.get_split_dimensions()
373+
374+
for rep_no in range(num_reps):
375+
for fold_no in range(num_folds):
376+
for sample_no in range(num_samples):
377+
model_fold = sklearn.base.clone(model, safe=True)
378+
train_indices, test_indices = task.get_train_test_split_indices(repeat=rep_no,
379+
fold=fold_no,
380+
sample=sample_no)
381+
trainX = X[train_indices]
382+
trainY = Y[train_indices]
383+
testX = X[test_indices]
384+
testY = Y[test_indices]
385+
386+
try:
387+
# for measuring runtime. Only available since Python 3.3
388+
if can_measure_runtime:
389+
modelfit_starttime = time.process_time()
390+
model_fold.fit(trainX, trainY)
391+
392+
if can_measure_runtime:
393+
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
394+
user_defined_measures_sample['usercpu_time_millis_training'][rep_no][fold_no][sample_no] = modelfit_duration
395+
user_defined_measures_fold['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration
396+
except AttributeError as e:
397+
# typically happens when training a regressor on classification task
398+
raise PyOpenMLError(str(e))
399+
400+
# extract trace, if applicable
401+
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
402+
arff_tracecontent.extend(_extract_arfftrace(model_fold, rep_no, fold_no))
403+
404+
# search for model classes_ (might differ depending on modeltype)
405+
# first, pipelines are a special case (these don't have a classes_
406+
# object, but rather borrows it from the last step. We do this manually,
407+
# because of the BaseSearch check)
408+
if isinstance(model_fold, sklearn.pipeline.Pipeline):
409+
used_estimator = model_fold.steps[-1][-1]
410+
else:
411+
used_estimator = model_fold
395412

396-
if isinstance(used_estimator, sklearn.model_selection._search.BaseSearchCV):
397-
model_classes = used_estimator.best_estimator_.classes_
398-
else:
399-
model_classes = used_estimator.classes_
413+
if isinstance(used_estimator, sklearn.model_selection._search.BaseSearchCV):
414+
model_classes = used_estimator.best_estimator_.classes_
415+
else:
416+
model_classes = used_estimator.classes_
400417

401-
if can_measure_runtime:
402-
modelpredict_starttime = time.process_time()
403-
404-
ProbaY = model_fold.predict_proba(testX)
405-
PredY = model_fold.predict(testX)
406-
if can_measure_runtime:
407-
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
408-
user_defined_measures['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
409-
user_defined_measures['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration
418+
if can_measure_runtime:
419+
modelpredict_starttime = time.process_time()
410420

411-
if ProbaY.shape[1] != len(class_labels):
412-
warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))
421+
ProbaY = model_fold.predict_proba(testX)
422+
PredY = model_fold.predict(testX)
423+
if can_measure_runtime:
424+
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
425+
user_defined_measures_fold['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
426+
user_defined_measures_fold['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration
427+
user_defined_measures_sample['usercpu_time_millis_testing'][rep_no][fold_no][sample_no] = modelpredict_duration
428+
user_defined_measures_sample['usercpu_time_millis'][rep_no][fold_no][sample_no] = modelfit_duration + modelpredict_duration
413429

414-
for i in range(0, len(test_indices)):
415-
arff_line = _prediction_to_row(rep_no, fold_no, test_indices[i], class_labels[testY[i]], PredY[i], ProbaY[i], class_labels, model_classes)
416-
arff_datacontent.append(arff_line)
430+
if ProbaY.shape[1] != len(class_labels):
431+
warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))
417432

418-
fold_no = fold_no + 1
419-
rep_no = rep_no + 1
433+
for i in range(0, len(test_indices)):
434+
arff_line = _prediction_to_row(rep_no, fold_no, sample_no,
435+
test_indices[i], class_labels[testY[i]],
436+
PredY[i], ProbaY[i], class_labels, model_classes)
437+
arff_datacontent.append(arff_line)
420438

421439
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
422440
# arff_tracecontent is already set
423441
arff_trace_attributes = _extract_arfftrace_attributes(model_fold)
424442
else:
425443
arff_tracecontent = None
426444
arff_trace_attributes = None
427-
return arff_datacontent, arff_tracecontent, arff_trace_attributes, user_defined_measures
445+
446+
return arff_datacontent, \
447+
arff_tracecontent, \
448+
arff_trace_attributes, \
449+
user_defined_measures_fold, \
450+
user_defined_measures_sample
428451

429452

430453
def _extract_arfftrace(model, rep_no, fold_no):

openml/runs/run.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class OpenMLRun(object):
2121
"""
2222
def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
2323
output_files=None, setup_id=None, tags=None, uploader=None, uploader_name=None,
24-
evaluations=None, detailed_evaluations=None,
24+
evaluations=None, fold_evaluations=None, sample_evaluations=None,
2525
data_content=None, trace_attributes=None, trace_content=None,
2626
model=None, task_type=None, task_evaluation_measure=None, flow_name=None,
2727
parameter_settings=None, predictions_url=None, task=None,
@@ -38,7 +38,8 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
3838
self.parameter_settings = parameter_settings
3939
self.dataset_id = dataset_id
4040
self.evaluations = evaluations
41-
self.detailed_evaluations = detailed_evaluations
41+
self.fold_evaluations = fold_evaluations
42+
self.sample_evaluations = sample_evaluations
4243
self.data_content = data_content
4344
self.output_files = output_files
4445
self.trace_attributes = trace_attributes
@@ -72,6 +73,7 @@ def _generate_arff_dict(self):
7273
arff_dict = {}
7374
arff_dict['attributes'] = [('repeat', 'NUMERIC'), # lowercase 'numeric' gives an error
7475
('fold', 'NUMERIC'),
76+
('sample', 'NUMERIC'),
7577
('row_id', 'NUMERIC')] + \
7678
[('confidence.' + class_labels[i], 'NUMERIC') for i in range(len(class_labels))] +\
7779
[('prediction', class_labels),
@@ -154,7 +156,8 @@ def _create_description_xml(self):
154156
setup_string=_create_setup_string(self.model),
155157
parameter_settings=self.parameter_settings,
156158
error_message=self.error_message,
157-
detailed_evaluations=self.detailed_evaluations,
159+
fold_evaluations=self.fold_evaluations,
160+
sample_evaluations=self.sample_evaluations,
158161
tags=self.tags)
159162
description_xml = xmltodict.unparse(description, pretty=True)
160163
return description_xml
@@ -284,7 +287,8 @@ def _get_version_information():
284287
return [python_version, sklearn_version, numpy_version, scipy_version]
285288

286289

287-
def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags=None, detailed_evaluations=None):
290+
def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings,
291+
tags=None, fold_evaluations=None, sample_evaluations=None):
288292
""" Creates a dictionary corresponding to the desired xml desired by openML
289293
290294
Parameters
@@ -298,7 +302,11 @@ def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, t
298302
tags : array of strings
299303
information that give a description of the run, must conform to
300304
regex ``([a-zA-Z0-9_\-\.])+``
301-
305+
fold_evaluations : dict mapping from evaluation measure to a dict mapping repeat_nr
306+
to a dict mapping from fold nr to a value (double)
307+
sample_evaluations : dict mapping from evaluation measure to a dict mapping repeat_nr
308+
to a dict mapping from fold nr to a dict mapping to a sample nr to a value (double)
309+
sample_evaluations :
302310
Returns
303311
-------
304312
result : an array with version information of the above packages
@@ -313,15 +321,25 @@ def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, t
313321
description['oml:run']['oml:parameter_setting'] = parameter_settings
314322
if tags is not None:
315323
description['oml:run']['oml:tag'] = tags # Tags describing the run
316-
if detailed_evaluations is not None:
324+
if fold_evaluations is not None or sample_evaluations is not None:
317325
description['oml:run']['oml:output_data'] = dict()
318326
description['oml:run']['oml:output_data']['oml:evaluation'] = list()
319-
for measure in detailed_evaluations:
320-
for repeat in detailed_evaluations[measure]:
321-
for fold, value in detailed_evaluations[measure][repeat].items():
327+
if fold_evaluations is not None:
328+
for measure in fold_evaluations:
329+
for repeat in fold_evaluations[measure]:
330+
for fold, value in fold_evaluations[measure][repeat].items():
322331
current = OrderedDict([('@repeat', str(repeat)), ('@fold', str(fold)),
323332
('oml:name', measure), ('oml:value', str(value))])
324333
description['oml:run']['oml:output_data']['oml:evaluation'].append(current)
334+
if sample_evaluations is not None:
335+
for measure in sample_evaluations:
336+
for repeat in sample_evaluations[measure]:
337+
for fold in sample_evaluations[measure][repeat]:
338+
for sample, value in sample_evaluations[measure][repeat][fold].items():
339+
current = OrderedDict([('@repeat', str(repeat)), ('@fold', str(fold)),
340+
('@sample', str(sample)), ('oml:name', measure),
341+
('oml:value', str(value))])
342+
description['oml:run']['oml:output_data']['oml:evaluation'].append(current)
325343
return description
326344

327345

0 commit comments

Comments
 (0)