Skip to content

Commit 29996a3

Browse files
committed
ADD check that component is not used twice in a flow
1 parent 28c57ff commit 29996a3

2 files changed

Lines changed: 58 additions & 0 deletions

File tree

openml/flows/sklearn_converter.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,20 @@ def _serialize_model(model):
241241
parameters_meta_info[k] = OrderedDict((('description', None),
242242
('data_type', None)))
243243

244+
# Check that a component does not occur multiple times in a flow as this
245+
# is not supported by OpenML
246+
to_visit_stack = []
247+
to_visit_stack.extend(sub_components.values())
248+
known_sub_components = set()
249+
while len(to_visit_stack) > 0:
250+
visitee = to_visit_stack.pop()
251+
if visitee.name in known_sub_components:
252+
raise ValueError('Found a second occurence of component %s when '
253+
'trying to serialize %s.' % (visitee.name, model))
254+
else:
255+
known_sub_components.add(visitee.name)
256+
to_visit_stack.extend(visitee.components.values())
257+
244258
# Create a flow name, which contains all components in brackets, for
245259
# example RandomizedSearchCV(Pipeline(StandardScaler,AdaBoostClassifier(DecisionTreeClassifier)),StandardScaler,AdaBoostClassifier(DecisionTreeClassifier))
246260
# TODO the name above is apparently wrong, I need to test and check this

tests/flows/test_sklearn.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,47 @@ def test_gaussian_process(self):
391391
"<class 'sklearn.gaussian_process.kernels.Matern'>",
392392
sklearn_to_flow, gp)
393393

394+
def test_error_on_adding_component_multiple_times_to_flow(self):
395+
pca = sklearn.decomposition.PCA()
396+
pca2 = sklearn.decomposition.PCA()
397+
pipeline = sklearn.pipeline.Pipeline((('pca1', pca), ('pca2', pca2)))
398+
fixture = "Found a second occurence of component sklearn.decomposition.pca.PCA" \
399+
" when trying to serialize Pipeline\(steps=\(\('pca1', " \
400+
"PCA\(copy=True, iterated_power='auto', n_components=None, " \
401+
"random_state=None,\n" \
402+
" svd_solver='auto', tol=0.0, whiten=False\)\), " \
403+
"\('pca2', PCA\(copy=True, iterated_power='auto', " \
404+
"n_components=None, random_state=None,\n" \
405+
" svd_solver='auto', tol=0.0, whiten=False\)\)\)\)."
406+
#self.assertRaisesRegexp(ValueError, fixture, sklearn_to_flow, pipeline)
407+
408+
fu = sklearn.pipeline.FeatureUnion((('pca1', pca), ('pca2', pca2)))
409+
fixture = "Found a second occurence of component sklearn.decomposition.pca.PCA when trying to serialize " \
410+
"FeatureUnion\(n_jobs=1,\n" \
411+
" transformer_list=\(\('pca1', PCA\(copy=True, " \
412+
"iterated_power='auto'," \
413+
" n_components=None, random_state=None,\n" \
414+
" svd_solver='auto', tol=0.0, whiten=False\)\), \('pca2', " \
415+
"PCA\(copy=True, iterated_power='auto'," \
416+
" n_components=None, random_state=None,\n" \
417+
" svd_solver='auto', tol=0.0, whiten=False\)\)\),\n" \
418+
" transformer_weights=None\)."
419+
#self.assertRaisesRegexp(ValueError, fixture, sklearn_to_flow, fu)
420+
421+
fs = sklearn.feature_selection.SelectKBest()
422+
fu2 = sklearn.pipeline.FeatureUnion((('pca1', pca), ('fs', fs)))
423+
pipeline2 = sklearn.pipeline.Pipeline((('fu', fu2), ('pca2', pca2)))
424+
fixture = "Found a second occurence of component " \
425+
"sklearn.decomposition.pca.PCA when trying to serialize " \
426+
"Pipeline\(steps=\(\('fu', FeatureUnion\(n_jobs=1,\n" \
427+
" transformer_list=\(\('pca1', PCA\(copy=True, " \
428+
"iterated_power='auto'," \
429+
" n_components=None, random_state=None,\n" \
430+
" svd_solver='auto', tol=0.0, whiten=False\)\), " \
431+
"\('fs', SelectKBest\(k=10, score_func=<function " \
432+
"f_classif at 0x[a-z0-9]+>\)\)\),\n" \
433+
" transformer_weights=None\)\), \('pca2', " \
434+
"PCA\(copy=True, iterated_power='auto'," \
435+
" n_components=None, random_state=None,\n" \
436+
" svd_solver='auto', tol=0.0, whiten=False\)\)\)\)."
437+
self.assertRaisesRegexp(ValueError, fixture, sklearn_to_flow, pipeline2)

0 commit comments

Comments
 (0)