Skip to content

Commit 45c7bc8

Browse files
committed
FIX feature union with switched names
1 parent dfff969 commit 45c7bc8

3 files changed

Lines changed: 36 additions & 13 deletions

File tree

openml/flows/sklearn_converter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ def _serialize_model(model):
200200
# parameters, which will be replaced by the real component
201201
# when deserealizing the parameter
202202
sub_component_identifier = k + '__' + identifier
203+
sub_components_explicit.add(sub_component_identifier)
203204
sub_components[sub_component_identifier] = sub_component
204205
component_reference = OrderedDict()
205206
component_reference['oml-python:serialized_object'] = 'component_reference'

tests/flows/test_flow.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,12 +199,12 @@ def test_sklearn_to_upload_to_flow(self):
199199

200200
fixture_name = 'sklearn.model_selection._search.RandomizedSearchCV(' \
201201
'estimator=sklearn.pipeline.Pipeline(' \
202-
'sklearn.preprocessing.data.OneHotEncoder,' \
203-
'sklearn.preprocessing.data.StandardScaler,' \
204-
'sklearn.pipeline.FeatureUnion(' \
205-
'sklearn.decomposition.truncated_svd.TruncatedSVD,' \
206-
'sklearn.feature_selection.univariate_selection.SelectPercentile),' \
207-
'sklearn.ensemble.weight_boosting.AdaBoostClassifier(' \
202+
'steps__ohe=sklearn.preprocessing.data.OneHotEncoder,' \
203+
'steps__scaler=sklearn.preprocessing.data.StandardScaler,' \
204+
'steps__fu=sklearn.pipeline.FeatureUnion(' \
205+
'transformer_list__pca=sklearn.decomposition.truncated_svd.TruncatedSVD,' \
206+
'transformer_list__fs=sklearn.feature_selection.univariate_selection.SelectPercentile),' \
207+
'steps__boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier(' \
208208
'base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))'
209209

210210
self.assertEqual(new_flow.name, fixture_name)

tests/flows/test_sklearn.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
from openml.flows import OpenMLFlow, sklearn_to_flow, flow_to_sklearn
2323

2424

25+
__version__ = 0.1
26+
27+
2528
class Model(sklearn.base.BaseEstimator):
2629
def __init__(self, boolean, integer, floating_point_value):
2730
self.boolean = boolean
@@ -120,7 +123,8 @@ def test_serialize_pipeline(self):
120123
('scaler', scaler), ('dummy', dummy)))
121124

122125
fixture_name = 'sklearn.pipeline.Pipeline(' \
123-
'sklearn.preprocessing.data.StandardScaler,sklearn.dummy.DummyClassifier)'
126+
'steps__scaler=sklearn.preprocessing.data.StandardScaler,' \
127+
'steps__dummy=sklearn.dummy.DummyClassifier)'
124128
fixture_description = 'Automatically created sub-component.'
125129

126130
serialization = sklearn_to_flow(model)
@@ -178,8 +182,8 @@ def test_serialize_feature_union(self):
178182
serialization = sklearn_to_flow(fu)
179183
self.assertEqual(serialization.name,
180184
'sklearn.pipeline.FeatureUnion('
181-
'sklearn.preprocessing.data.OneHotEncoder,'
182-
'sklearn.preprocessing.data.StandardScaler)')
185+
'transformer_list__ohe=sklearn.preprocessing.data.OneHotEncoder,'
186+
'transformer_list__scaler=sklearn.preprocessing.data.StandardScaler)')
183187
new_model = flow_to_sklearn(serialization)
184188

185189
self.assertEqual(type(new_model), type(fu))
@@ -214,12 +218,30 @@ def test_serialize_feature_union(self):
214218
serialization = sklearn_to_flow(fu)
215219
self.assertEqual(serialization.name,
216220
'sklearn.pipeline.FeatureUnion('
217-
'sklearn.preprocessing.data.OneHotEncoder)')
221+
'transformer_list__ohe=sklearn.preprocessing.data.OneHotEncoder)')
218222
new_model = flow_to_sklearn(serialization)
219223
self.assertEqual(type(new_model), type(fu))
220224
self.assertIsNot(new_model, fu)
221225
self.assertIs(new_model.transformer_list[1][1], None)
222226

227+
def test_serialize_feature_union_switched_names(self):
228+
ohe = sklearn.preprocessing.OneHotEncoder()
229+
scaler = sklearn.preprocessing.StandardScaler()
230+
fu1 = sklearn.pipeline.FeatureUnion(transformer_list=[('ohe', ohe), ('scaler', scaler)])
231+
fu2 = sklearn.pipeline.FeatureUnion(transformer_list=[('scaler', ohe), ('ohe', scaler)])
232+
fu1_serialization = sklearn_to_flow(fu1)
233+
fu2_serialization = sklearn_to_flow(fu2)
234+
self.assertEqual(
235+
fu1_serialization.name,
236+
"sklearn.pipeline.FeatureUnion("
237+
"transformer_list__ohe=sklearn.preprocessing.data.OneHotEncoder,"
238+
"transformer_list__scaler=sklearn.preprocessing.data.StandardScaler)")
239+
self.assertEqual(
240+
fu2_serialization.name,
241+
"sklearn.pipeline.FeatureUnion("
242+
"transformer_list__scaler=sklearn.preprocessing.data.OneHotEncoder,"
243+
"transformer_list__ohe=sklearn.preprocessing.data.StandardScaler)")
244+
223245
def test_serialize_complex_flow(self):
224246
ohe = sklearn.preprocessing.OneHotEncoder(categorical_features=[0])
225247
scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
@@ -238,9 +260,9 @@ def test_serialize_complex_flow(self):
238260

239261
fixture_name = 'sklearn.model_selection._search.RandomizedSearchCV(' \
240262
'estimator=sklearn.pipeline.Pipeline(' \
241-
'sklearn.preprocessing.data.OneHotEncoder,' \
242-
'sklearn.preprocessing.data.StandardScaler,' \
243-
'sklearn.ensemble.weight_boosting.AdaBoostClassifier(' \
263+
'steps__ohe=sklearn.preprocessing.data.OneHotEncoder,' \
264+
'steps__scaler=sklearn.preprocessing.data.StandardScaler,' \
265+
'steps__boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier(' \
244266
'base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))'
245267
self.assertEqual(serialized.name, fixture_name)
246268

0 commit comments

Comments
 (0)