77import xmltodict
88
99import scipy .stats
10+ import sklearn .datasets
1011import sklearn .dummy
1112import sklearn .ensemble
1213import sklearn .model_selection
@@ -123,17 +124,22 @@ def test_publish_flow(self, name_mock):
123124
124125 @mock .patch .object (openml .OpenMLFlow , '_get_name' , autospec = True )
125126 def test_sklearn_to_upload_to_flow (self , name_mock ):
127+ iris = sklearn .datasets .load_iris ()
128+ X = iris .data
129+ y = iris .target
130+
126131 # Create a unique prefix for the flow. Necessary because the flow is
127132 # identified by its name and external version online. Having a unique
128133 # name allows us to publish the same flow in each test run
129134 md5 = hashlib .md5 ()
130135 md5 .update (str (time .time ()).encode ('utf-8' ))
131136 sentinel = md5 .hexdigest ()[:10 ]
137+ sentinel = 'TEST%s' % sentinel
132138 def side_effect (self ):
133139 if sentinel in self .name :
134140 return self .name
135141 else :
136- return 'TEST %s%s' % (sentinel , self .name )
142+ return '%s%s' % (sentinel , self .name )
137143 name_mock .side_effect = side_effect
138144
139145 # Test a more complicated flow
@@ -142,18 +148,32 @@ def side_effect(self):
142148 base_estimator = sklearn .tree .DecisionTreeClassifier ())
143149 model = sklearn .pipeline .Pipeline (steps = (
144150 ('scaler' , scaler ), ('boosting' , boosting )))
145- parameter_grid = {'n_estimators' : [1 , 5 , 10 , 100 ],
146- 'learning_rate' : scipy .stats .uniform (0.01 , 0.99 ),
147- 'base_estimator__max_depth' : scipy .stats .randint (1 , 10 )}
151+ parameter_grid = {'boosting__n_estimators' : [1 , 5 , 10 , 100 ],
152+ 'boosting__learning_rate' : scipy .stats .uniform (0.01 , 0.99 ),
153+ 'boosting__base_estimator__max_depth' : scipy .stats .randint (1 , 10 )}
154+ cv = sklearn .model_selection .StratifiedKFold (n_splits = 5 , shuffle = True )
148155 rs = sklearn .model_selection .RandomizedSearchCV (
149- estimator = model , param_distributions = parameter_grid )
156+ estimator = model , param_distributions = parameter_grid , cv = cv )
157+ rs .fit (X , y )
150158 flow = openml .flows .create_flow_from_model (rs , SklearnToFlowConverter ())
151159
152160 flow .publish ()
153161 self .assertIsInstance (flow .flow_id , int )
154162
155163 # Check whether we can load the flow again
156- new_flow = openml .flows .get_flow (flow_id = flow .flow_id )
164+ # Remove the sentinel from the name again so that we can reinstantiate
165+ # the object again
166+ def side_effect (self ):
167+ if sentinel in self .name :
168+ name = self .name .replace (sentinel , '' )
169+ return name
170+ else :
171+ return self .name
172+ name_mock .side_effect = side_effect
173+
174+ name_mock .side_effect = side_effect
175+ new_flow = openml .flows .get_flow (flow_id = flow .flow_id ,
176+ converter = SklearnToFlowConverter ())
157177
158178 local_xml = flow ._to_xml ()
159179 server_xml = new_flow ._to_xml ()
@@ -175,4 +195,14 @@ def side_effect(self):
175195
176196 self .assertEqual (new_flow , flow )
177197 self .assertIsNot (new_flow , flow )
198+ new_flow .model .fit (X , y )
199+
200+ fixture_name = 'sklearn.model_selection._search.RandomizedSearchCV(' \
201+ 'sklearn.model_selection._split.StratifiedKFold,' \
202+ 'sklearn.pipeline.Pipeline(' \
203+ 'sklearn.preprocessing.data.StandardScaler,' \
204+ 'sklearn.ensemble.weight_boosting.AdaBoostClassifier(' \
205+ 'sklearn.tree.tree.DecisionTreeClassifier)))'
206+
207+ self .assertEqual (new_flow ._get_name (), fixture_name )
178208
0 commit comments