1- from sklearn .linear_model import LogisticRegression , SGDClassifier
1+ import sys
2+
3+ from sklearn .linear_model import LogisticRegression , SGDClassifier , LinearRegression
24from sklearn .ensemble import RandomForestClassifier , BaggingClassifier
35from sklearn .svm import SVC
4- from sklearn .model_selection import RandomizedSearchCV , GridSearchCV
6+ from sklearn .model_selection import RandomizedSearchCV , GridSearchCV , StratifiedKFold
57import openml
68import openml .exceptions
79from openml .testing import TestBase
810
11+ if sys .version_info [0 ] >= 3 :
12+ from unittest import mock
13+ else :
14+ import mock
15+
916
1017class TestRun (TestBase ):
1118
@@ -20,6 +27,23 @@ def _perform_run(self, task_id, num_instances, clf):
2027 self .assertEqual (len (run .data_content ), num_instances )
2128 return run
2229
30+ def test_run_regression_on_classif_task (self ):
31+ task_id = 10107
32+
33+ clf = LinearRegression ()
34+ task = openml .tasks .get_task (task_id )
35+ self .assertRaises (AttributeError , openml .runs .run_task , task = task , model = clf )
36+
37+ @mock .patch ('openml.flows.sklearn_to_flow' )
38+ def test_check_erronous_sklearn_flow_fails (self , sklearn_to_flow_mock ):
39+ task_id = 10107
40+ task = openml .tasks .get_task (task_id )
41+
42+ # Invalid parameter values
43+ clf = LogisticRegression (C = 'abc' )
44+ self .assertEqual (sklearn_to_flow_mock .call_count , 0 )
45+ self .assertRaisesRegexp (ValueError , "Penalty term must be positive; got \(C='abc'\)" ,
46+ openml .runs .run_task , task = task , model = clf )
2347
2448 def test_run_iris (self ):
2549 task_id = 10107
@@ -28,34 +52,35 @@ def test_run_iris(self):
2852 clf = LogisticRegression ()
2953 self ._perform_run (task_id ,num_instances , clf )
3054
31-
3255 def test_run_optimize_randomforest_iris (self ):
3356 task_id = 10107
3457 num_instances = 150
3558 num_folds = 10
3659 num_iterations = 5
3760
38- clf = RandomForestClassifier (n_estimators = 10 )
61+ clf = RandomForestClassifier (n_estimators = 5 )
3962 param_dist = {"max_depth" : [3 , None ],
4063 "max_features" : [1 ,2 ,3 ,4 ],
4164 "min_samples_split" : [2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 ],
4265 "min_samples_leaf" : [1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 ],
4366 "bootstrap" : [True , False ],
4467 "criterion" : ["gini" , "entropy" ]}
45- random_search = RandomizedSearchCV (clf , param_dist ,n_iter = num_iterations )
68+ cv = StratifiedKFold (n_splits = 3 )
69+ random_search = RandomizedSearchCV (clf , param_dist , cv = cv ,
70+ n_iter = num_iterations )
4671
47- run = self ._perform_run (task_id ,num_instances , random_search )
72+ run = self ._perform_run (task_id , num_instances , random_search )
4873 self .assertEqual (len (run .trace_content ), num_iterations * num_folds )
4974
5075 def test_run_optimize_bagging_iris (self ):
5176 task_id = 10107
5277 num_instances = 150
5378 num_folds = 10
54- num_iterations = 36 # (num values for C times gamma)
79+ num_iterations = 9 # (num values for C times gamma)
5580
5681 bag = BaggingClassifier (base_estimator = SVC ())
57- param_dist = {"base_estimator__C" : [0.001 , 0. 01 , 0.1 , 1 , 10 , 100 ],
58- "base_estimator__gamma" : [0.001 , 0. 01 , 0.1 , 1 , 10 , 100 ]}
82+ param_dist = {"base_estimator__C" : [0.01 , 0.1 , 10 ],
83+ "base_estimator__gamma" : [0.01 , 0.1 , 10 ]}
5984 grid_search = GridSearchCV (bag , param_dist )
6085
6186 run = self ._perform_run (task_id , num_instances , grid_search )
0 commit comments