1+ import sys
2+
13from sklearn .linear_model import LogisticRegression , SGDClassifier , LinearRegression
24from sklearn .ensemble import RandomForestClassifier , BaggingClassifier
35from sklearn .svm import SVC
4- from sklearn .model_selection import RandomizedSearchCV , GridSearchCV
6+ from sklearn .model_selection import RandomizedSearchCV , GridSearchCV , StratifiedKFold
57import openml
68import openml .exceptions
79from openml .testing import TestBase
810
11+ if sys .version_info [0 ] >= 3 :
12+ from unittest import mock
13+ else :
14+ import mock
15+
916
1017class TestRun (TestBase ):
1118
@@ -27,28 +34,40 @@ def test_run_regression_on_classif_task(self):
2734 task = openml .tasks .get_task (task_id )
2835 self .assertRaises (AttributeError , openml .runs .run_task , task = task , model = clf )
2936
37+ @mock .patch ('openml.flows.sklearn_to_flow' )
38+ def test_check_erronous_sklearn_flow_fails (self , sklearn_to_flow_mock ):
39+ task_id = 10107
40+ task = openml .tasks .get_task (task_id )
41+
42+ # Invalid parameter values
43+ clf = LogisticRegression (C = 'abc' )
44+ self .assertEqual (sklearn_to_flow_mock .call_count , 0 )
45+ self .assertRaisesRegexp (ValueError , "Penalty term must be positive; got \(C='abc'\)" ,
46+ openml .runs .run_task , task = task , model = clf )
47+
3048 def test_run_iris (self ):
3149 task_id = 10107
3250 num_instances = 150
3351
3452 clf = LogisticRegression ()
3553 self ._perform_run (task_id ,num_instances , clf )
3654
37-
3855 def test_run_optimize_randomforest_iris (self ):
3956 task_id = 10107
4057 num_instances = 150
4158 num_folds = 10
4259 num_iterations = 5
4360
44- clf = RandomForestClassifier (n_estimators = 10 )
61+ clf = RandomForestClassifier (n_estimators = 5 )
4562 param_dist = {"max_depth" : [3 , None ],
4663 "max_features" : [1 ,2 ,3 ,4 ],
4764 "min_samples_split" : [2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 ],
4865 "min_samples_leaf" : [1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 ],
4966 "bootstrap" : [True , False ],
5067 "criterion" : ["gini" , "entropy" ]}
51- random_search = RandomizedSearchCV (clf , param_dist ,n_iter = num_iterations )
68+ cv = StratifiedKFold (n_splits = 3 )
69+ random_search = RandomizedSearchCV (clf , param_dist , cv = cv ,
70+ n_iter = num_iterations )
5271
5372 run = self ._perform_run (task_id , num_instances , random_search )
5473 self .assertEqual (len (run .trace_content ), num_iterations * num_folds )
@@ -57,11 +76,11 @@ def test_run_optimize_bagging_iris(self):
5776 task_id = 10107
5877 num_instances = 150
5978 num_folds = 10
60- num_iterations = 16 # (num values for C times gamma)
79+ num_iterations = 9 # (num values for C times gamma)
6180
6281 bag = BaggingClassifier (base_estimator = SVC ())
63- param_dist = {"base_estimator__C" : [0.01 , 0.1 , 1 , 10 ],
64- "base_estimator__gamma" : [0.01 , 0.1 , 1 , 10 ]}
82+ param_dist = {"base_estimator__C" : [0.01 , 0.1 , 10 ],
83+ "base_estimator__gamma" : [0.01 , 0.1 , 10 ]}
6584 grid_search = GridSearchCV (bag , param_dist )
6685
6786 run = self ._perform_run (task_id , num_instances , grid_search )
0 commit comments