Skip to content

Commit 04d5677

Browse files
committed
extended unit test cases
1 parent 8325c72 commit 04d5677

1 file changed

Lines changed: 44 additions & 16 deletions

File tree

tests/test_runs/test_run_functions.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -229,30 +229,58 @@ def test_get_run_trace(self):
229229
"criterion": ["gini", "entropy"]},
230230
num_iterations, random_state=42)
231231

232-
# [START] for speeding up this unit test!
233-
flow = openml.flows.sklearn_to_flow(clf)
234-
flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
235-
if flow_exists:
236-
flow = openml.flows.get_flow(flow_exists)
237-
setup_exists = openml.setups.setup_exists(flow, clf)
238-
if setup_exists:
239-
# receives a set of runids. These should all be the same.
240-
run_id = random.choice(list(_run_exists(task_id, setup_exists)))
241-
# [END] speeding up unit test
242-
243-
# ensure the run exists ...
244-
if run_id is None:
245-
print("Run not executed yet .. running random search on Random Forest")
246-
# we can be strict about duplicate runs
232+
# [SPEED] make unit test faster by exploiting run information from the past
233+
try:
234+
# in case the run did not exists yet
247235
run = openml.runs.run_task(task, clf, avoid_duplicate_runs=True)
248236
run = run.publish()
249237
self._wait_for_processed_run(run.run_id, 80)
250238
run_id = run.run_id
239+
except openml.exceptions.PyOpenMLError:
240+
# run was already
241+
flow = openml.flows.sklearn_to_flow(clf)
242+
flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
243+
self.assertIsInstance(flow_exists, int)
244+
downloaded_flow = openml.flows.get_flow(flow_exists)
245+
setup_exists = openml.setups.setup_exists(downloaded_flow, clf)
246+
self.assertIsInstance(setup_exists, int)
247+
run_ids = _run_exists(task.task_id, setup_exists)
248+
run_id = random.choice(list(run_ids))
251249

252250
# now the actual unit test ...
253251
run_trace = openml.runs.get_run_trace(run_id)
254252
self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
255253

254+
def test__run_exists(self):
255+
# would be better to not sentinel these clfs ..
256+
clfs = [sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='mean')),
257+
('VarianceThreshold', VarianceThreshold(threshold=0.05)),
258+
('Estimator', GaussianNB())]),
259+
sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='most_frequent')),
260+
('VarianceThreshold', VarianceThreshold(threshold=0.1)),
261+
('Estimator', DecisionTreeClassifier(max_depth=4))])]
262+
task = openml.tasks.get_task(1)
263+
264+
for clf in clfs:
265+
try:
266+
# first populate the server with this run.
267+
# skip run if it was already performed.
268+
run = openml.runs.run_task(task, clf, avoid_duplicate_runs=True)
269+
run.publish()
270+
except openml.exceptions.PyOpenMLError:
271+
# run already existed. Great.
272+
pass
273+
274+
flow = openml.flows.sklearn_to_flow(clf)
275+
flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
276+
self.assertIsInstance(flow_exists, int)
277+
downloaded_flow = openml.flows.get_flow(flow_exists)
278+
setup_exists = openml.setups.setup_exists(downloaded_flow, clf)
279+
self.assertIsInstance(setup_exists, int)
280+
run_ids = _run_exists(task.task_id, setup_exists)
281+
self.assertGreater(len(run_ids), 0)
282+
283+
256284
def test_get_seeded_model(self):
257285
# randomized models that are initialized without seeds, can be seeded
258286
randomized_clfs = [
@@ -285,7 +313,7 @@ def test_get_seeded_model(self):
285313

286314
# afterwards, param value is set
287315
for param in randstate_params:
288-
self.assertTrue(isinstance(new_params[param], int))
316+
self.assertIsInstance(new_params[param], int)
289317
self.assertIsNotNone(new_params[param])
290318

291319
def test_get_seeded_model_raises(self):

0 commit comments

Comments
 (0)