|
19 | 19 | # Necessary to have signature available in python 2.7 |
20 | 20 | from sklearn.utils.fixes import signature |
21 | 21 |
|
22 | | -from .flow import OpenMLFlow |
| 22 | +from openml.flows import OpenMLFlow |
| 23 | +from openml.exceptions import PyOpenMLError |
23 | 24 |
|
24 | 25 |
|
25 | 26 | if sys.version_info >= (3, 5): |
|
32 | 33 | '^(?P<name>[\w\-]+)((?P<operation>==|>=|>)(?P<version>(\d+\.)?(\d+\.)?(\d+)))?$') |
33 | 34 |
|
34 | 35 |
|
35 | | -def sklearn_to_flow(o): |
| 36 | +def sklearn_to_flow(o, parent_model=None): |
| 37 | + # TODO: assert that only on first recursion lvl `parent_model` can be None |
36 | 38 |
|
37 | 39 | if _is_estimator(o): |
| 40 | + # is the main model or a submodel |
38 | 41 | rval = _serialize_model(o) |
39 | 42 | elif isinstance(o, (list, tuple)): |
40 | | - rval = [sklearn_to_flow(element) for element in o] |
| 43 | + # TODO: explain what type of parameter is here |
| 44 | + rval = [sklearn_to_flow(element, parent_model) for element in o] |
41 | 45 | if isinstance(o, tuple): |
42 | 46 | rval = tuple(rval) |
43 | 47 | elif isinstance(o, (bool, int, float, six.string_types)) or o is None: |
| 48 | + # base parameter values |
44 | 49 | rval = o |
45 | 50 | elif isinstance(o, dict): |
| 51 | + # TODO: explain what type of parameter is here |
46 | 52 | rval = OrderedDict() |
47 | 53 | for key, value in o.items(): |
48 | 54 | if not isinstance(key, six.string_types): |
49 | 55 | raise TypeError('Can only use string as keys, you passed ' |
50 | 56 | 'type %s for value %s.' % |
51 | 57 | (type(key), str(key))) |
52 | | - key = sklearn_to_flow(key) |
53 | | - value = sklearn_to_flow(value) |
| 58 | + key = sklearn_to_flow(key, parent_model) |
| 59 | + value = sklearn_to_flow(value, parent_model) |
54 | 60 | rval[key] = value |
55 | 61 | rval = rval |
56 | 62 | elif isinstance(o, type): |
| 63 | + # TODO: explain what type of parameter is here |
57 | 64 | rval = serialize_type(o) |
58 | 65 | elif isinstance(o, scipy.stats.distributions.rv_frozen): |
59 | 66 | rval = serialize_rv_frozen(o) |
60 | 67 | # This only works for user-defined functions (and not even partial). |
61 | 68 | # I think this is exactly what we want here as there shouldn't be any |
62 | 69 | # built-in or functool.partials in a pipeline |
63 | 70 | elif inspect.isfunction(o): |
| 71 | + # TODO: explain what type of parameter is here |
64 | 72 | rval = serialize_function(o) |
65 | 73 | elif _is_cross_validator(o): |
| 74 | + # TODO: explain what type of parameter is here |
66 | 75 | rval = _serialize_cross_validator(o) |
67 | 76 | else: |
68 | 77 | raise TypeError(o, type(o)) |
@@ -256,18 +265,26 @@ def _extract_information_from_model(model): |
256 | 265 |
|
257 | 266 | model_parameters = model.get_params(deep=False) |
258 | 267 | for k, v in sorted(model_parameters.items(), key=lambda t: t[0]): |
259 | | - rval = sklearn_to_flow(v) |
| 268 | + rval = sklearn_to_flow(v, model) |
260 | 269 |
|
261 | 270 | if (isinstance(rval, (list, tuple)) and len(rval) > 0 and |
262 | 271 | isinstance(rval[0], (list, tuple)) and |
263 | 272 | [type(rval[0]) == type(rval[i]) for i in range(len(rval))]): |
264 | 273 |
|
265 | | - # Steps in a pipeline or feature union |
| 274 | + # Steps in a pipeline or feature union, or base classifiers in voting classifier |
266 | 275 | parameter_value = list() |
| 276 | + reserved_keywords = set(model.get_params(deep=False).keys()) |
| 277 | + |
267 | 278 | for sub_component_tuple in rval: |
268 | 279 | identifier, sub_component = sub_component_tuple |
269 | 280 | sub_component_type = type(sub_component_tuple) |
270 | 281 |
|
| 282 | + if identifier in reserved_keywords: |
| 283 | + parent_model_name = model.__module__ + "." + \ |
| 284 | + model.__class__.__name__ |
| 285 | + raise PyOpenMLError('Found element shadowing official ' + \ |
| 286 | + 'parameter for %s: %s' % (parent_model_name, identifier)) |
| 287 | + |
271 | 288 | if sub_component is None: |
272 | 289 | # In a FeatureUnion it is legal to have a None step |
273 | 290 |
|
@@ -310,7 +327,7 @@ def _extract_information_from_model(model): |
310 | 327 | component_reference[ |
311 | 328 | 'oml-python:serialized_object'] = 'component_reference' |
312 | 329 | component_reference['value'] = OrderedDict(key=k, step_name=None) |
313 | | - component_reference = sklearn_to_flow(component_reference) |
| 330 | + component_reference = sklearn_to_flow(component_reference, model) |
314 | 331 | parameters[k] = json.dumps(component_reference) |
315 | 332 |
|
316 | 333 | else: |
|
0 commit comments