Skip to content

Commit 7621bff

Browse files
committed
extended detailed evaluations check
1 parent 2975384 commit 7621bff

1 file changed

Lines changed: 32 additions & 18 deletions

File tree

tests/test_runs/test_run_functions.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,33 @@ def _perform_run(self, task_id, num_instances, clf, check_setup=True):
122122

123123
return run
124124

125+
126+
def _check_detailed_evaluations(self, detailed_evaluations, num_repeats, num_folds):
127+
'''
128+
Checks whether the right timing measures are attached to the run (before upload).
129+
Test is only performed for versions >= Python3.3
130+
131+
In case of check_n_jobs(clf) == false, please do not perform this check (check this
132+
condition outside of this function. )
133+
'''
134+
timing_measures = {'usercpu_time_millis_testing', 'usercpu_time_millis_training', 'usercpu_time_millis'}
135+
136+
self.assertIsInstance(detailed_evaluations, dict)
137+
if sys.version_info[:2] >= (3, 3):
138+
self.assertEquals(set(detailed_evaluations.keys()), timing_measures)
139+
for measure in timing_measures:
140+
num_rep_entrees = len(detailed_evaluations[measure])
141+
self.assertEquals(num_rep_entrees, num_repeats)
142+
for rep in range(num_rep_entrees):
143+
num_fold_entrees = len(detailed_evaluations[measure][rep])
144+
self.assertEquals(num_fold_entrees, num_folds)
145+
for fold in range(num_fold_entrees):
146+
evaluation = detailed_evaluations[measure][rep][fold]
147+
self.assertIsInstance(evaluation, float)
148+
self.assertGreater(evaluation, 0) # should take at least one millisecond (?)
149+
self.assertLess(evaluation, 360) # 5 minutes, pessimistic
150+
151+
125152
def test_run_regression_on_classif_task(self):
126153
task_id = 115
127154

@@ -192,6 +219,7 @@ def test_run_and_upload(self):
192219
self.assertTrue(check_res)
193220

194221
# todo: check if runtime is present
222+
self._check_detailed_evaluations(run.detailed_evaluations, 1, num_folds)
195223
pass
196224

197225

@@ -223,7 +251,6 @@ def test_get_run_trace(self):
223251
num_iterations = 10
224252
num_folds = 1
225253
task_id = 119
226-
run_id = None
227254

228255
task = openml.tasks.get_task(task_id)
229256
# IMPORTANT! Do not sentinel this flow. is faster if we don't wait on openml server
@@ -257,7 +284,9 @@ def test_get_run_trace(self):
257284
self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
258285

259286
def test__run_exists(self):
260-
# would be better to not sentinel these clfs ..
287+
# would be better to not sentinel these clfs,
288+
# so we do not have to perform the actual runs
289+
# and can just check their status on line
261290
clfs = [sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='mean')),
262291
('VarianceThreshold', VarianceThreshold(threshold=0.05)),
263292
('Estimator', GaussianNB())]),
@@ -431,7 +460,6 @@ def test_run_with_classifiers_in_param_grid(self):
431460
task=task, model=clf, avoid_duplicate_runs=False)
432461

433462
def test__run_task_get_arffcontent(self):
434-
timing_measures = {'usercpu_time_millis_testing', 'usercpu_time_millis_training', 'usercpu_time_millis'}
435463
task = openml.tasks.get_task(7)
436464
class_labels = task.class_labels
437465
num_instances = 3196
@@ -452,21 +480,7 @@ def test__run_task_get_arffcontent(self):
452480
# trace. SGD does not produce any
453481
self.assertIsInstance(arff_tracecontent, type(None))
454482

455-
self.assertIsInstance(detailed_evaluations, dict)
456-
if sys.version_info[:2] >= (3, 3): # check_n_jobs follows from the used clf:
457-
self.assertEquals(set(detailed_evaluations.keys()), timing_measures)
458-
for measure in timing_measures:
459-
num_rep_entrees = len(detailed_evaluations[measure])
460-
self.assertEquals(num_rep_entrees, num_repeats)
461-
for rep in range(num_rep_entrees):
462-
num_fold_entrees = len(detailed_evaluations[measure][rep])
463-
self.assertEquals(num_fold_entrees, num_folds)
464-
for fold in range(num_fold_entrees):
465-
evaluation = detailed_evaluations[measure][rep][fold]
466-
self.assertIsInstance(evaluation, float)
467-
self.assertGreater(evaluation, 0) # should take at least one millisecond (?)
468-
self.assertLess(evaluation, 60) # pessimistic
469-
483+
self._check_detailed_evaluations(detailed_evaluations, num_repeats, num_folds)
470484

471485
# 10 times 10 fold CV of 150 samples
472486
self.assertEqual(len(arff_datacontent), num_instances * num_repeats)

0 commit comments

Comments
 (0)