@@ -229,30 +229,58 @@ def test_get_run_trace(self):
229229 "criterion" : ["gini" , "entropy" ]},
230230 num_iterations , random_state = 42 )
231231
232- # [START] for speeding up this unit test!
233- flow = openml .flows .sklearn_to_flow (clf )
234- flow_exists = openml .flows .flow_exists (flow .name , flow .external_version )
235- if flow_exists :
236- flow = openml .flows .get_flow (flow_exists )
237- setup_exists = openml .setups .setup_exists (flow , clf )
238- if setup_exists :
239- # receives a set of runids. These should all be the same.
240- run_id = random .choice (list (_run_exists (task_id , setup_exists )))
241- # [END] speeding up unit test
242-
243- # ensure the run exists ...
244- if run_id is None :
245- print ("Run not executed yet .. running random search on Random Forest" )
246- # we can be strict about duplicate runs
232+ # [SPEED] make unit test faster by exploiting run information from the past
233+ try :
234+ # in case the run did not exists yet
247235 run = openml .runs .run_task (task , clf , avoid_duplicate_runs = True )
248236 run = run .publish ()
249237 self ._wait_for_processed_run (run .run_id , 80 )
250238 run_id = run .run_id
239+ except openml .exceptions .PyOpenMLError :
240+ # run was already
241+ flow = openml .flows .sklearn_to_flow (clf )
242+ flow_exists = openml .flows .flow_exists (flow .name , flow .external_version )
243+ self .assertIsInstance (flow_exists , int )
244+ downloaded_flow = openml .flows .get_flow (flow_exists )
245+ setup_exists = openml .setups .setup_exists (downloaded_flow , clf )
246+ self .assertIsInstance (setup_exists , int )
247+ run_ids = _run_exists (task .task_id , setup_exists )
248+ run_id = random .choice (list (run_ids ))
251249
252250 # now the actual unit test ...
253251 run_trace = openml .runs .get_run_trace (run_id )
254252 self .assertEqual (len (run_trace .trace_iterations ), num_iterations * num_folds )
255253
254+ def test__run_exists (self ):
255+ # would be better to not sentinel these clfs ..
256+ clfs = [sklearn .pipeline .Pipeline (steps = [('Imputer' , Imputer (strategy = 'mean' )),
257+ ('VarianceThreshold' , VarianceThreshold (threshold = 0.05 )),
258+ ('Estimator' , GaussianNB ())]),
259+ sklearn .pipeline .Pipeline (steps = [('Imputer' , Imputer (strategy = 'most_frequent' )),
260+ ('VarianceThreshold' , VarianceThreshold (threshold = 0.1 )),
261+ ('Estimator' , DecisionTreeClassifier (max_depth = 4 ))])]
262+ task = openml .tasks .get_task (1 )
263+
264+ for clf in clfs :
265+ try :
266+ # first populate the server with this run.
267+ # skip run if it was already performed.
268+ run = openml .runs .run_task (task , clf , avoid_duplicate_runs = True )
269+ run .publish ()
270+ except openml .exceptions .PyOpenMLError :
271+ # run already existed. Great.
272+ pass
273+
274+ flow = openml .flows .sklearn_to_flow (clf )
275+ flow_exists = openml .flows .flow_exists (flow .name , flow .external_version )
276+ self .assertIsInstance (flow_exists , int )
277+ downloaded_flow = openml .flows .get_flow (flow_exists )
278+ setup_exists = openml .setups .setup_exists (downloaded_flow , clf )
279+ self .assertIsInstance (setup_exists , int )
280+ run_ids = _run_exists (task .task_id , setup_exists )
281+ self .assertGreater (len (run_ids ), 0 )
282+
283+
256284 def test_get_seeded_model (self ):
257285 # randomized models that are initialized without seeds, can be seeded
258286 randomized_clfs = [
@@ -285,7 +313,7 @@ def test_get_seeded_model(self):
285313
286314 # afterwards, param value is set
287315 for param in randstate_params :
288- self .assertTrue ( isinstance ( new_params [param ], int ) )
316+ self .assertIsInstance ( new_params [param ], int )
289317 self .assertIsNotNone (new_params [param ])
290318
291319 def test_get_seeded_model_raises (self ):
0 commit comments