1010import unittest .mock
1111
1212import numpy as np
13+ import joblib
1314from joblib import parallel_backend
1415
1516import openml
@@ -1187,13 +1188,10 @@ def test__run_task_get_arffcontent(self):
11871188 num_folds = 10
11881189 num_repeats = 1
11891190
1190- flow = unittest .mock .Mock ()
1191- flow .name = "dummy"
11921191 clf = make_pipeline (
11931192 OneHotEncoder (handle_unknown = "ignore" ), SGDClassifier (loss = "log" , random_state = 1 )
11941193 )
11951194 res = openml .runs .functions ._run_task_get_arffcontent (
1196- flow = flow ,
11971195 extension = self .extension ,
11981196 model = clf ,
11991197 task = task ,
@@ -1404,8 +1402,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
14041402 # Check that _run_task_get_arffcontent works when one of the class
14051403 # labels only declared in the arff file, but is not present in the
14061404 # actual data
1407- flow = unittest .mock .Mock ()
1408- flow .name = "dummy"
14091405 task = openml .tasks .get_task (2 ) # anneal; crossvalidation
14101406
14111407 from sklearn .compose import ColumnTransformer
@@ -1420,7 +1416,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
14201416 ) # build a sklearn classifier
14211417
14221418 data_content , _ , _ , _ = _run_task_get_arffcontent (
1423- flow = flow ,
14241419 model = model ,
14251420 task = task ,
14261421 extension = self .extension ,
@@ -1442,8 +1437,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
14421437 # Check that _run_task_get_arffcontent works when one of the class
14431438 # labels only declared in the arff file, but is not present in the
14441439 # actual data
1445- flow = unittest .mock .Mock ()
1446- flow .name = "dummy"
14471440 task = openml .tasks .get_task (2 ) # anneal; crossvalidation
14481441 # task_id=2 on test server has 38 columns with 6 numeric columns
14491442 cont_idx = [3 , 4 , 8 , 32 , 33 , 34 ]
@@ -1465,7 +1458,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
14651458 ) # build a sklearn classifier
14661459
14671460 data_content , _ , _ , _ = _run_task_get_arffcontent (
1468- flow = flow ,
14691461 model = model ,
14701462 task = task ,
14711463 extension = self .extension ,
@@ -1581,20 +1573,18 @@ def test_format_prediction_task_regression(self):
15811573 LooseVersion (sklearn .__version__ ) < "0.21" ,
15821574 reason = "couldn't perform local tests successfully w/o bloating RAM" ,
15831575 )
1584- @unittest .mock .patch ("openml.extensions.sklearn.SklearnExtension._run_model_on_fold " )
1576+ @unittest .mock .patch ("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs " )
15851577 def test__run_task_get_arffcontent_2 (self , parallel_mock ):
15861578 """ Tests if a run executed in parallel is collated correctly. """
15871579 task = openml .tasks .get_task (7 ) # Supervised Classification on kr-vs-kp
15881580 x , y = task .get_X_and_y (dataset_format = "dataframe" )
15891581 num_instances = x .shape [0 ]
15901582 line_length = 6 + len (task .class_labels )
1591- flow = unittest .mock .Mock ()
1592- flow .name = "dummy"
15931583 clf = SGDClassifier (loss = "log" , random_state = 1 )
15941584 n_jobs = 2
1595- with parallel_backend ("loky" , n_jobs = n_jobs ):
1585+ backend = "loky" if LooseVersion (joblib .__version__ ) > "0.11" else "multiprocessing"
1586+ with parallel_backend (backend , n_jobs = n_jobs ):
15961587 res = openml .runs .functions ._run_task_get_arffcontent (
1597- flow = flow ,
15981588 extension = self .extension ,
15991589 model = clf ,
16001590 task = task ,
@@ -1606,6 +1596,9 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
16061596 # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
16071597 # is not and the mock call_count should remain 0 while the subsequent check of actual
16081598 # results should also hold, only on successful distribution of tasks to workers.
1599+ # The _prevent_optimize_n_jobs() is a function executed within the _run_model_on_fold()
1600+ # block and mocking this function doesn't affect rest of the pipeline, but is adequately
1601+ # indicative if _run_model_on_fold() is being called or not.
16091602 self .assertEqual (parallel_mock .call_count , 0 )
16101603 self .assertIsInstance (res [0 ], list )
16111604 self .assertEqual (len (res [0 ]), num_instances )
@@ -1638,13 +1631,12 @@ def test_joblib_backends(self, parallel_mock):
16381631 x , y = task .get_X_and_y (dataset_format = "dataframe" )
16391632 num_instances = x .shape [0 ]
16401633 line_length = 6 + len (task .class_labels )
1641- flow = unittest .mock .Mock ()
1642- flow .name = "dummy"
16431634
1635+ backend_choice = "loky" if LooseVersion (joblib .__version__ ) > "0.11" else "multiprocessing"
16441636 for n_jobs , backend , len_time_stats , call_count in [
1645- (1 , "loky" , 7 , 10 ),
1646- (2 , "loky" , 4 , 10 ),
1647- (- 1 , "loky" , 1 , 10 ),
1637+ (1 , backend_choice , 7 , 10 ),
1638+ (2 , backend_choice , 4 , 10 ),
1639+ (- 1 , backend_choice , 1 , 10 ),
16481640 (1 , "threading" , 7 , 20 ),
16491641 (- 1 , "threading" , 1 , 30 ),
16501642 (1 , "sequential" , 7 , 40 ),
@@ -1668,7 +1660,6 @@ def test_joblib_backends(self, parallel_mock):
16681660 )
16691661 with parallel_backend (backend , n_jobs = n_jobs ):
16701662 res = openml .runs .functions ._run_task_get_arffcontent (
1671- flow = flow ,
16721663 extension = self .extension ,
16731664 model = clf ,
16741665 task = task ,
0 commit comments