3434
3535
3636class TestRun (TestBase ):
37+ _multiprocess_can_split_ = True
3738
3839 def _wait_for_processed_run (self , run_id , max_waiting_time_seconds ):
3940 # it can take a while for a run to be processed on the OpenML (test) server
@@ -267,46 +268,81 @@ def test__publish_flow_if_necessary(self):
267268 openml .runs .functions ._publish_flow_if_necessary (flow2 )
268269 self .assertEqual (flow2 .flow_id , flow .flow_id )
269270
270- def test_run_and_upload (self ):
271- # This unit test is ment to test the following functions, using a varity of flows:
272- # - openml.runs.run_task()
273- # - openml.runs.OpenMLRun.publish()
274- # - openml.runs.initialize_model()
275- # - [implicitly] openml.setups.initialize_model()
276- # - openml.runs.initialize_model_from_trace()
277- task_id = 119 # diabates dataset
278- num_test_instances = 253 # 33% holdout task
279- num_folds = 1 # because of holdout
280- num_iterations = 5 # for base search classifiers
281-
282- clfs = []
283- random_state_fixtures = []
271+ ############################################################################
272+ # These unit tests are ment to test the following functions, using a varity
273+ # of flows:
274+ # - openml.runs.run_task()
275+ # - openml.runs.OpenMLRun.publish()
276+ # - openml.runs.initialize_model()
277+ # - [implicitly] openml.setups.initialize_model()
278+ # - openml.runs.initialize_model_from_trace()
279+ # They're split among several actual functions to allow for parallel
280+ # execution of the unit tests without the need to add an additional module
281+ # like unittest2
282+
283+ def _run_and_upload (self , clf , rsv ):
284+ task_id = 119 # diabates dataset
285+ num_test_instances = 253 # 33% holdout task
286+ num_folds = 1 # because of holdout
287+ num_iterations = 5 # for base search classifiers
288+
289+ run = self ._perform_run (task_id , num_test_instances , clf ,
290+ random_state_value = rsv )
291+
292+ # obtain accuracy scores using get_metric_score:
293+ accuracy_scores = run .get_metric_fn (sklearn .metrics .accuracy_score )
294+ # compare with the scores in user defined measures
295+ accuracy_scores_provided = []
296+ for rep in run .fold_evaluations ['predictive_accuracy' ].keys ():
297+ for fold in run .fold_evaluations ['predictive_accuracy' ][rep ].keys ():
298+ accuracy_scores_provided .append (
299+ run .fold_evaluations ['predictive_accuracy' ][rep ][fold ])
300+ self .assertEquals (sum (accuracy_scores_provided ), sum (accuracy_scores ))
301+
302+ if isinstance (clf , BaseSearchCV ):
303+ if isinstance (clf , GridSearchCV ):
304+ grid_iterations = 1
305+ for param in clf .param_grid :
306+ grid_iterations *= len (clf .param_grid [param ])
307+ self .assertEqual (len (run .trace_content ),
308+ grid_iterations * num_folds )
309+ else :
310+ self .assertEqual (len (run .trace_content ),
311+ num_iterations * num_folds )
312+ check_res = self ._check_serialized_optimized_run (run .run_id )
313+ self .assertTrue (check_res )
284314
315+ # todo: check if runtime is present
316+ self ._check_fold_evaluations (run .fold_evaluations , 1 , num_folds )
317+ pass
318+
319+ def test_run_and_upload_logistic_regression (self ):
285320 lr = LogisticRegression ()
286- clfs .append (lr )
287- random_state_fixtures .append ('62501' )
321+ self ._run_and_upload (lr , '62501' )
322+
323+ def test_run_and_upload_pipeline1 (self ):
288324
289325 pipeline1 = Pipeline (steps = [('scaler' , StandardScaler (with_mean = False )),
290326 ('dummy' , DummyClassifier (strategy = 'prior' ))])
291- clfs .append (pipeline1 )
292- random_state_fixtures .append ('62501' )
327+ self ._run_and_upload (pipeline1 , '62501' )
293328
329+ def test_run_and_upload_pipeline2 (self ):
294330 pipeline2 = Pipeline (steps = [('Imputer' , Imputer (strategy = 'median' )),
295331 ('VarianceThreshold' , VarianceThreshold ()),
296332 ('Estimator' , RandomizedSearchCV (
297333 DecisionTreeClassifier (),
298334 {'min_samples_split' : [2 ** x for x in range (1 , 7 + 1 )],
299335 'min_samples_leaf' : [2 ** x for x in range (0 , 6 + 1 )]},
300336 cv = 3 , n_iter = 10 ))])
301- clfs .append (pipeline2 )
302- random_state_fixtures .append ('62501' )
337+ self ._run_and_upload (pipeline2 , '62501' )
303338
339+ def test_run_and_upload_gridsearch (self ):
304340 gridsearch = GridSearchCV (BaggingClassifier (base_estimator = SVC ()),
305341 {"base_estimator__C" : [0.01 , 0.1 , 10 ],
306342 "base_estimator__gamma" : [0.01 , 0.1 , 10 ]})
307- clfs .append (gridsearch )
308- random_state_fixtures .append ('62501' )
343+ self ._run_and_upload (gridsearch , '62501' )
309344
345+ def test_run_and_upload_randomsearch (self ):
310346 randomsearch = RandomizedSearchCV (
311347 RandomForestClassifier (n_estimators = 5 ),
312348 {"max_depth" : [3 , None ],
@@ -316,60 +352,34 @@ def test_run_and_upload(self):
316352 "bootstrap" : [True , False ],
317353 "criterion" : ["gini" , "entropy" ]},
318354 cv = StratifiedKFold (n_splits = 2 , shuffle = True ),
319- n_iter = num_iterations )
320-
321- clfs .append (randomsearch )
355+ n_iter = 5 )
322356 # The random states for the RandomizedSearchCV is set after the
323357 # random state of the RandomForestClassifier is set, therefore,
324358 # it has a different value than the other examples before
325- random_state_fixtures .append ('12172' )
326-
327- for clf , rsv in zip (clfs , random_state_fixtures ):
328- run = self ._perform_run (task_id , num_test_instances , clf ,
329- random_state_value = rsv )
330-
331- # obtain accuracy scores using get_metric_score:
332- accuracy_scores = run .get_metric_fn (sklearn .metrics .accuracy_score )
333- # compare with the scores in user defined measures
334- accuracy_scores_provided = []
335- for rep in run .fold_evaluations ['predictive_accuracy' ].keys ():
336- for fold in run .fold_evaluations ['predictive_accuracy' ][rep ].keys ():
337- accuracy_scores_provided .append (run .fold_evaluations ['predictive_accuracy' ][rep ][fold ])
338- self .assertEquals (sum (accuracy_scores_provided ), sum (accuracy_scores ))
339-
340- if isinstance (clf , BaseSearchCV ):
341- if isinstance (clf , GridSearchCV ):
342- grid_iterations = 1
343- for param in clf .param_grid :
344- grid_iterations *= len (clf .param_grid [param ])
345- self .assertEqual (len (run .trace_content ), grid_iterations * num_folds )
346- else :
347- self .assertEqual (len (run .trace_content ), num_iterations * num_folds )
348- check_res = self ._check_serialized_optimized_run (run .run_id )
349- self .assertTrue (check_res )
350-
351- # todo: check if runtime is present
352- self ._check_fold_evaluations (run .fold_evaluations , 1 , num_folds )
353- pass
354-
355- def test_learning_curve_task (self ):
359+ self ._run_and_upload (randomsearch , '12172' )
360+
361+ ############################################################################
362+
363+ def test_learning_curve_task_1 (self ):
356364 task_id = 801 # diabates dataset
357365 num_test_instances = 6144 # for learning curve
358366 num_repeats = 1
359367 num_folds = 10
360368 num_samples = 8
361369
362- clfs = []
363- random_state_fixtures = []
364-
365- #nb = GaussianNB()
366- #clfs.append(nb)
367- #random_state_fixtures.append('62501')
368-
369370 pipeline1 = Pipeline (steps = [('scaler' , StandardScaler (with_mean = False )),
370371 ('dummy' , DummyClassifier (strategy = 'prior' ))])
371- clfs .append (pipeline1 )
372- random_state_fixtures .append ('62501' )
372+ run = self ._perform_run (task_id , num_test_instances , pipeline1 ,
373+ random_state_value = '62501' )
374+ self ._check_sample_evaluations (run .sample_evaluations , num_repeats ,
375+ num_folds , num_samples )
376+
377+ def test_learning_curve_task_2 (self ):
378+ task_id = 801 # diabates dataset
379+ num_test_instances = 6144 # for learning curve
380+ num_repeats = 1
381+ num_folds = 10
382+ num_samples = 8
373383
374384 pipeline2 = Pipeline (steps = [('Imputer' , Imputer (strategy = 'median' )),
375385 ('VarianceThreshold' , VarianceThreshold ()),
@@ -378,16 +388,10 @@ def test_learning_curve_task(self):
378388 {'min_samples_split' : [2 ** x for x in range (1 , 7 + 1 )],
379389 'min_samples_leaf' : [2 ** x for x in range (0 , 6 + 1 )]},
380390 cv = 3 , n_iter = 10 ))])
381- clfs .append (pipeline2 )
382- random_state_fixtures .append ('62501' )
383-
384-
385- for clf , rsv in zip (clfs , random_state_fixtures ):
386- run = self ._perform_run (task_id , num_test_instances , clf ,
387- random_state_value = rsv )
388-
389- # todo: check if runtime is present
390- self ._check_sample_evaluations (run .sample_evaluations , num_repeats , num_folds , num_samples )
391+ run = self ._perform_run (task_id , num_test_instances , pipeline2 ,
392+ random_state_value = '62501' )
393+ self ._check_sample_evaluations (run .sample_evaluations , num_repeats ,
394+ num_folds , num_samples )
391395
392396 def test_initialize_cv_from_run (self ):
393397 randomsearch = RandomizedSearchCV (
@@ -455,7 +459,6 @@ def test_online_run_metric_score(self):
455459 run = openml .runs .get_run (5965513 ) # important to use binary classification task, due to assertions
456460 self ._test_local_evaluations (run )
457461
458-
459462 def test_initialize_model_from_run (self ):
460463 clf = sklearn .pipeline .Pipeline (steps = [('Imputer' , Imputer (strategy = 'median' )),
461464 ('VarianceThreshold' , VarianceThreshold (threshold = 0.05 )),
0 commit comments