changes requested my @mfeurer

janvanrijn · janvanrijn · commit 7d8fa4d2c252 · 2017-04-25T16:10:13.000+02:00
diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
@@ -1,6 +1,6 @@
 from .flow import OpenMLFlow
-from .sklearn_converter import sklearn_to_flow, flow_to_sklearn,  model_single_core
+from .sklearn_converter import sklearn_to_flow, flow_to_sklearn, _check_n_jobs
 from .functions import get_flow, list_flows, flow_exists
 
 __all__ = ['OpenMLFlow', 'create_flow_from_model', 'get_flow', 'list_flows',
-           'sklearn_to_flow', 'flow_to_sklearn', 'flow_exists', 'model_is_paralizable']
+           'sklearn_to_flow', 'flow_to_sklearn', 'flow_exists']
diff --git a/openml/flows/sklearn_converter.py b/openml/flows/sklearn_converter.py
@@ -536,7 +536,7 @@ def _serialize_cross_validator(o):
 
     return ret
 
-def model_single_core(model):
+def _check_n_jobs(model):
     '''
     Returns True if the parameter settings of model are chosen s.t. the model
      will run on a single core (in that case, openml-python can measure runtimes)
@@ -556,8 +556,7 @@ def check(param_dict, disallow_parameter=False):
             isinstance(model, sklearn.model_selection._search.BaseSearchCV)):
         raise ValueError('model should be BaseEstimator or BaseSearchCV')
 
-    # check if the njobs is not in the optimization trace
-    # this would be error by the user, so we can throw it as a courtesy
+    # make sure that n_jobs is not in the parameter grid of optimization procedure
     if isinstance(model, sklearn.model_selection._search.BaseSearchCV):
         param_distributions = None
         if isinstance(model, sklearn.model_selection.GridSearchCV):
@@ -569,16 +568,12 @@ def check(param_dict, disallow_parameter=False):
                   '{GridSearchCV, RandomizedSearchCV}. Should implement param check. ')
             pass
 
-
         if not check(param_distributions, True):
             raise PyOpenMLError('openml-python should not be used to '
                                 'optimize the n_jobs parameter.')
 
     # check the parameters for n_jobs
-    if check(model.get_params(), False) == False:
-        return False
-
-    return True
+    return check(model.get_params(), False)
 
 def _deserialize_cross_validator(value, **kwargs):
     model_name = value['name']
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -10,7 +10,7 @@
 
 from ..exceptions import PyOpenMLError
 from .. import config
-from ..flows import sklearn_to_flow, get_flow, flow_exists, model_single_core
+from ..flows import sklearn_to_flow, get_flow, flow_exists, _check_n_jobs
 from ..setups import setup_exists
 from ..exceptions import OpenMLCacheException, OpenMLServerException
 from ..util import URLError, version_complies
@@ -160,7 +160,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
     user_defined_measures = defaultdict(lambda: defaultdict(dict))
 
     rep_no = 0
-    can_measure_runtime = version_complies(3, 3) and model_single_core(model)
+    can_measure_runtime = version_complies(3, 3) and _check_n_jobs(model)
     # TODO use different iterator to only provide a single iterator (less
     # methods, less maintenance, less confusion)
     for rep in task.iterate_repeats():
diff --git a/tests/test_flows/test_sklearn.py b/tests/test_flows/test_sklearn.py
@@ -26,7 +26,7 @@
 
 from openml.flows import OpenMLFlow, sklearn_to_flow, flow_to_sklearn
 from openml.flows.sklearn_converter import _format_external_version, \
-    _check_dependencies, model_single_core
+    _check_dependencies, _check_n_jobs
 from openml.exceptions import PyOpenMLError
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
@@ -558,9 +558,13 @@ def test_illegal_parameter_names_featureunion(self):
         self.assertRaises(ValueError, sklearn.pipeline.FeatureUnion, transformer_list=transformer_list)
 
     def test_paralizable_check(self):
+        # using this model should pass the test (if param distribution is legal)
         singlecore_bagging = sklearn.ensemble.BaggingClassifier()
+        # using this model should return false (if param distribution is legal)
         multicore_bagging = sklearn.ensemble.BaggingClassifier(n_jobs=5)
+        # using this param distribution should raise an exception
         illegal_param_dist = {"base__n_jobs": [-1, 0, 1] }
+        # using this param distribution should not raise an exception
         legal_param_dist = {"base__max_depth": [2, 3, 4]}
 
         legal_models = [
@@ -581,7 +585,7 @@ def test_paralizable_check(self):
         answers = [True, False, False, True, False, False, True, False]
 
         for i in range(len(legal_models)):
-            self.assertTrue(model_single_core(legal_models[i]) == answers[i])
+            self.assertTrue(_check_n_jobs(legal_models[i]) == answers[i])
 
         for i in range(len(illegal_models)):
-            self.assertRaises(PyOpenMLError, model_single_core, illegal_models[i])
+            self.assertRaises(PyOpenMLError, _check_n_jobs, illegal_models[i])