Skip to content

Commit 67f8e19

Browse files
committed
add flows by default to flows generated by sklearn converter
1 parent 5428690 commit 67f8e19

4 files changed

Lines changed: 27 additions & 5 deletions

File tree

openml/flows/functions.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,15 @@ def assert_flows_equal(flow1, flow2, ignore_parameters_on_older_children=None,
208208
elif ignore_parameters:
209209
continue
210210

211-
if attr1 != attr2:
211+
if key == 'tags':
212+
if set(attr1) != set(attr2):
213+
raise ValueError(
214+
"Flow %s: values for attribute '%s' differ: "
215+
"'%s' vs '%s'." %
216+
(str(flow1.name), str(key), str(set(attr1)),
217+
str(set(attr2))))
218+
219+
elif attr1 != attr2:
212220
raise ValueError("Flow %s: values for attribute '%s' differ: "
213221
"'%s' vs '%s'." %
214222
(str(flow1.name), str(key), str(attr1), str(attr2)))

openml/flows/sklearn_converter.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,15 @@ def _serialize_model(model):
208208
parameters=parameters,
209209
parameters_meta_info=parameters_meta_info,
210210
external_version=external_version,
211-
tags=[],
211+
tags=['openml-python', 'sklearn', 'scikit-learn',
212+
'python',
213+
_format_external_version('sklearn',
214+
sklearn.__version__).replace('==', '_'),
215+
# TODO: add more tags based on the scikit-learn
216+
# module a flow is in? For example automatically
217+
# annotate a class of sklearn.svm.SVC() with the
218+
# tag svm?
219+
],
212220
language='English',
213221
# TODO fill in dependencies!
214222
dependencies=dependencies)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ nose
77
requests
88
scikit-learn>=0.18
99
nbformat
10+
dateutil

tests/test_flows/test_flow.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,14 @@ def test_sklearn_to_upload_to_flow(self):
276276
estimator=model, param_distributions=parameter_grid, cv=cv)
277277
rs.fit(X, y)
278278
flow = openml.flows.sklearn_to_flow(rs)
279-
flow.tags.extend(['openml-python', 'unittest'])
279+
# Tags may be sorted in any order (by the server). Just using one tag
280+
# makes sure that the xml comparison does not fail because of that.
281+
subflows = [flow]
282+
while len(subflows) > 0:
283+
f = subflows.pop()
284+
f.tags = []
285+
subflows.extend(list(f.components.values()))
286+
280287
flow, sentinel = self._add_sentinel_to_flow_name(flow, None)
281288

282289
flow.publish()
@@ -317,8 +324,6 @@ def test_sklearn_to_upload_to_flow(self):
317324
% sentinel
318325

319326
self.assertEqual(new_flow.name, fixture_name)
320-
self.assertTrue('openml-python' in new_flow.tags)
321-
self.assertTrue('unittest' in new_flow.tags)
322327
new_flow.model.fit(X, y)
323328

324329
def test_extract_tags(self):

0 commit comments

Comments
 (0)