3232 StratifiedKFold
3333from sklearn .pipeline import Pipeline
3434
35- if sys .version_info [0 ] >= 3 :
36- from unittest import mock
37- else :
38- import mock
39-
4035
4136class TestRun (TestBase ):
4237
@@ -219,34 +214,50 @@ def test_run_and_upload(self):
219214 num_folds = 1 # because of holdout
220215 num_iterations = 5 # for base search classifiers
221216
222- clfs = [LogisticRegression (),
223- Pipeline (steps = [('scaler' , StandardScaler (with_mean = False )),
224- ('dummy' , DummyClassifier (strategy = 'prior' ))]),
225- Pipeline (steps = [('Imputer' , Imputer (strategy = 'median' )),
226- ('VarianceThreshold' , VarianceThreshold ()),
227- ('Estimator' , RandomizedSearchCV (
228- DecisionTreeClassifier (),
229- {'min_samples_split' : [2 ** x for x in range (1 , 7 + 1 )],
230- 'min_samples_leaf' : [2 ** x for x in range (0 , 6 + 1 )]},
231- cv = 3 , n_iter = 10 ))]),
232- GridSearchCV (BaggingClassifier (base_estimator = SVC ()),
233- {"base_estimator__C" : [0.01 , 0.1 , 10 ],
234- "base_estimator__gamma" : [0.01 , 0.1 , 10 ]}),
235- RandomizedSearchCV (RandomForestClassifier (n_estimators = 5 ),
236- {"max_depth" : [3 , None ],
237- "max_features" : [1 , 2 , 3 , 4 ],
238- "min_samples_split" : [2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
239- "min_samples_leaf" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
240- "bootstrap" : [True , False ],
241- "criterion" : ["gini" , "entropy" ]},
242- cv = StratifiedKFold (n_splits = 2 ,
243- random_state = 1 ),
244- n_iter = num_iterations )]
245-
217+ clfs = []
218+ random_state_values = []
219+
220+ lr = LogisticRegression ()
221+ clfs .append (lr )
222+ random_state_values .append ('62501' )
223+
224+ pipeline1 = Pipeline (steps = [('scaler' , StandardScaler (with_mean = False )),
225+ ('dummy' , DummyClassifier (strategy = 'prior' ))])
226+ clfs .append (pipeline1 )
227+ random_state_values .append ('62501' )
228+
229+ pipeline2 = Pipeline (steps = [('Imputer' , Imputer (strategy = 'median' )),
230+ ('VarianceThreshold' , VarianceThreshold ()),
231+ ('Estimator' , RandomizedSearchCV (
232+ DecisionTreeClassifier (),
233+ {'min_samples_split' : [2 ** x for x in range (1 , 7 + 1 )],
234+ 'min_samples_leaf' : [2 ** x for x in range (0 , 6 + 1 )]},
235+ cv = 3 , n_iter = 10 ))])
236+ clfs .append (pipeline2 )
237+ random_state_values .append ('62501' )
238+
239+ gridsearch = GridSearchCV (BaggingClassifier (base_estimator = SVC ()),
240+ {"base_estimator__C" : [0.01 , 0.1 , 10 ],
241+ "base_estimator__gamma" : [0.01 , 0.1 , 10 ]})
242+ clfs .append (gridsearch )
243+ random_state_values .append ('62501' )
244+
245+ randomsearch = RandomizedSearchCV (
246+ RandomForestClassifier (n_estimators = 5 ),
247+ {"max_depth" : [3 , None ],
248+ "max_features" : [1 , 2 , 3 , 4 ],
249+ "min_samples_split" : [2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
250+ "min_samples_leaf" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
251+ "bootstrap" : [True , False ],
252+ "criterion" : ["gini" , "entropy" ]},
253+ cv = StratifiedKFold (n_splits = 2 , random_state = 1 ),
254+ n_iter = num_iterations )
255+
256+ clfs .append (randomsearch )
246257 # The random states for the RandomizedSearchCV is set after the
247258 # random state of the RandomForestClassifier is set, therefore,
248259 # it has a different value than the other examples before
249- random_state_values = [ '62501' ] * ( len ( clfs ) - 1 ) + [ '33003' ]
260+ random_state_values . append ( '33003' )
250261
251262 for clf , rsv in zip (clfs , random_state_values ):
252263 run = self ._perform_run (task_id , num_test_instances , clf ,
@@ -333,12 +344,11 @@ def test__run_exists(self):
333344 # and can just check their status on line
334345 clfs = [sklearn .pipeline .Pipeline (steps = [('Imputer' , Imputer (strategy = 'mean' )),
335346 ('VarianceThreshold' , VarianceThreshold (threshold = 0.05 )),
336- ('Estimator' , GaussianNB ( ))]),
347+ ('Estimator' , DecisionTreeClassifier ( max_depth = 4 ))]),
337348 sklearn .pipeline .Pipeline (steps = [('Imputer' , Imputer (strategy = 'most_frequent' )),
338349 ('VarianceThreshold' , VarianceThreshold (threshold = 0.1 )),
339350 ('Estimator' , DecisionTreeClassifier (max_depth = 4 ))])]
340351
341-
342352 task = openml .tasks .get_task (115 )
343353
344354 for clf in clfs :
@@ -347,18 +357,18 @@ def test__run_exists(self):
347357 # skip run if it was already performed.
348358 run = openml .runs .run_model_on_task (task , clf , avoid_duplicate_runs = True )
349359 run .publish ()
350- except openml .exceptions .PyOpenMLError :
360+ except openml .exceptions .PyOpenMLError as e :
351361 # run already existed. Great.
352362 pass
353363
354364 flow = openml .flows .sklearn_to_flow (clf )
355365 flow_exists = openml .flows .flow_exists (flow .name , flow .external_version )
356- self .assertIsInstance (flow_exists , int )
366+ self .assertGreater (flow_exists , 0 )
357367 downloaded_flow = openml .flows .get_flow (flow_exists )
358- setup_exists = openml .setups .setup_exists (downloaded_flow )
359- self .assertIsInstance (setup_exists , int )
368+ setup_exists = openml .setups .setup_exists (downloaded_flow , clf )
369+ self .assertGreater (setup_exists , 0 )
360370 run_ids = _run_exists (task .task_id , setup_exists )
361- self .assertGreater ( len (run_ids ), 0 )
371+ self .assertTrue ( run_ids , msg = (run_ids , clf ) )
362372
363373 def test__get_seeded_model (self ):
364374 # randomized models that are initialized without seeds, can be seeded
0 commit comments