1111class OpenMLFlow (object ):
1212 """OpenML Flow. Stores machine learning models.
1313
14+ Flows should not be generated manually, but by the function
15+ :meth:`openml.flows.create_flow_from_model`. Using this helper function
16+ ensures that all relevant fields are filled in.
17+
1418 Parameters
1519 ----------
16- model : scikit-learn compatible model
17- The model the flow consists of. The model needs to have fit and predict methods.
18- description : string
20+ name : str
21+ Name of the flow. Is used together with the attribute `external_version`
22+ as a unique identifier of the flow.
23+ description : str
1924 Description of the flow (free text).
20- contributor : string
21- FIXME
22- tag : string
23- FIXME
25+ model : object
26+ ML model which is described by this flow.
27+ components : OrderedDict
28+ Mapping from component identifier to an OpenMLFlow object.
29+ parameters : OrderedDict
30+ Mapping from parameter name to the parameter default value. The
31+ parameter default value must be of type `str`, so that the respective
32+ toolbox plugin can take care of casting the parameter default value to
33+ the correct type.
34+ parameters_meta_info : OrderedDict
35+ Mapping from parameter name to `dict`. Stores additional information for
36+ each parameter. Required keys are `data_type` and `description`.
37+ external_version : str
38+ Version number of the software the flow is implemented in. Is used
39+ together with the attribute `name` as a uniquer identifier of the flow.
40+ uploader : str
41+ OpenML user ID of the uploader. Filled in by the server.
42+ tags : list
43+ List of tags. Created on the server by other API calls.
44+ binary_url : str
45+ ??? - don't use - implemented because it is used by flows on the server
46+ binary_format : str
47+ ??? - don't use - implemented because it is used by flows on the server
48+ binary_md5 : str
49+ ??? - don't use - implemented because it is used by flows on the server
50+ version : str
51+ OpenML version of the flow.
52+ upload_date : str
53+ Date the flow was uploaded. Filled in by the server.
54+ language : str
55+ Natural language the flow is described in (not the programming
56+ language).
57+ dependencies : str
58+ A list of dependencies necessary to run the flow.
2459 flow_id : int, optional
2560 Flow ID. Assigned by the server (fixme shouldn't be here?)
26- uploader : string, optional
27- User uploading the model (fixme shouldn't be here?). Assigned by the server.
28-
29-
3061 """
62+ # TODO @Jan can you find better descriptions for binary_url, binary_md5,
63+ # binary_format and version?
3164 def __init__ (self , name , description = None , model = None , components = None ,
3265 parameters = None , parameters_meta_info = None ,
3366 external_version = None , uploader = None , tags = None ,
@@ -41,29 +74,34 @@ def __init__(self, name, description=None, model=None, components=None,
4174 if components is None :
4275 components = OrderedDict ()
4376 elif not isinstance (components , OrderedDict ):
44- raise TypeError ('components must be of type OrderedDict, but is %s.' %
45- type (components ))
77+ raise TypeError ('components must be of type OrderedDict, '
78+ 'but is %s.' % type (components ))
4679 self .components = components
80+
4781 if parameters is None :
4882 parameters = OrderedDict ()
4983 elif not isinstance (parameters , OrderedDict ):
50- raise TypeError ('parameters must be of type OrderedDict, but is %s.' %
51- type (parameters ))
84+ raise TypeError ('parameters must be of type OrderedDict, '
85+ 'but is %s.' % type (parameters ))
86+
5287 if parameters_meta_info is None :
5388 parameters_meta_info = OrderedDict ()
5489 elif not isinstance (parameters_meta_info , OrderedDict ):
55- raise TypeError ('parameters_meta_info must be of type OrderedDict, but is %s.' %
56- type (parameters_meta_info ))
90+ raise TypeError ('parameters_meta_info must be of type OrderedDict, '
91+ 'but is %s.' % type (parameters_meta_info ))
92+
5793 keys_parameters = set (parameters .keys ())
5894 keys_parameters_meta_info = set (parameters_meta_info .keys ())
5995 if len (keys_parameters .difference (keys_parameters_meta_info )) > 0 :
6096 raise ValueError ('Parameter %s only in parameters, but not in'
6197 'parameters_meta_info.' %
62- str (keys_parameters .difference (keys_parameters_meta_info )))
98+ str (keys_parameters .difference (
99+ keys_parameters_meta_info )))
63100 if len (keys_parameters_meta_info .difference (keys_parameters )) > 0 :
64- raise ValueError ('Parameter %s only in parameters_meta_info, but not in'
65- 'parameters.' %
66- str (keys_parameters_meta_info .difference (keys_parameters )))
101+ raise ValueError ('Parameter %s only in parameters_meta_info, '
102+ 'but not in parameters.' %
103+ str (keys_parameters_meta_info .difference (
104+ keys_parameters )))
67105
68106 self .parameters = parameters
69107 self .parameters_meta_info = parameters_meta_info
@@ -88,7 +126,7 @@ def _to_xml(self):
88126
89127 Returns
90128 -------
91- flow_xml : string
129+ str
92130 Flow represented as XML string.
93131 """
94132 flow_dict = self .__to_dict ()
@@ -99,6 +137,17 @@ def _to_xml(self):
99137 return flow_xml
100138
101139 def __to_dict (self ):
140+ """ Helper function used by _to_xml and __to_dict.
141+
142+ Creates a dictionary representation of self which can be serialized
143+ to xml by the function _to_xml.
144+
145+ Returns
146+ -------
147+ OrderedDict
148+ Flow represented as OrderedDict.
149+
150+ """
102151 flow_dict = OrderedDict ()
103152 flow_dict ['oml:flow' ] = OrderedDict ()
104153 flow_dict ['oml:flow' ]['@xmlns:oml' ] = 'http://openml.org/openml'
@@ -122,17 +171,21 @@ def __to_dict(self):
122171 for key in self .parameters :
123172 param_dict = OrderedDict ()
124173 param_dict ['oml:name' ] = key
174+
125175 if self .parameters_meta_info [key ]['data_type' ] is not None :
126- param_dict ['oml:data_type' ] = self .parameters_meta_info [key ].get ('data_type' )
176+ param_dict ['oml:data_type' ] = self .parameters_meta_info [key ].\
177+ get ('data_type' )
178+
127179 param_dict ['oml:default_value' ] = self .parameters [key ]
128180 if self .parameters_meta_info [key ]['description' ] is not None :
129- param_dict ['oml:description' ] = self .parameters_meta_info [key ].get ('description' )
181+ param_dict ['oml:description' ] = self .parameters_meta_info [key ].\
182+ get ('description' )
130183
131- for key , value in param_dict .items ():
132- if key is not None and not isinstance (key , six .string_types ):
184+ for key_ , value in param_dict .items ():
185+ if key_ is not None and not isinstance (key_ , six .string_types ):
133186 raise ValueError ('Parameter name %s cannot be serialized '
134187 'because it is of type %s. Only strings '
135- 'can be serialized.' % (key , type (key )))
188+ 'can be serialized.' % (key_ , type (key_ )))
136189 if value is not None and not isinstance (value , six .string_types ):
137190 raise ValueError ('Parameter value %s cannot be serialized '
138191 'because it is of type %s. Only strings '
@@ -146,15 +199,16 @@ def __to_dict(self):
146199 for key in self .components :
147200 component_dict = OrderedDict ()
148201 component_dict ['oml:identifier' ] = key
149- component_dict ['oml:flow' ] = self .components [key ].__to_dict ()['oml:flow' ]
202+ component_dict ['oml:flow' ] = \
203+ self .components [key ].__to_dict ()['oml:flow' ]
150204
151- for key in component_dict :
152- # We can only check the key here, because the value is a flow.
153- # The flow itself has to be valid by recursion
154- if key is not None and not isinstance (key , six .string_types ):
205+ for key_ in component_dict :
206+ # We only need to check if the key is a string, because the
207+ # value is a flow. The flow itself is valid by recursion
208+ if key_ is not None and not isinstance (key_ , six .string_types ):
155209 raise ValueError ('Parameter name %s cannot be serialized '
156210 'because it is of type %s. Only strings '
157- 'can be serialized.' % (key , type (key )))
211+ 'can be serialized.' % (key_ , type (key_ )))
158212
159213 components .append (component_dict )
160214
@@ -173,6 +227,18 @@ def __to_dict(self):
173227
174228 @classmethod
175229 def _from_xml (cls , xml_dict ):
230+ """Create a flow from an xml description.
231+
232+ Parameters
233+ ----------
234+ xml_dict : dict
235+ Dictionary representation of the flow as created by _to_dict()
236+
237+ Returns
238+ -------
239+ OpenMLFlow
240+
241+ """
176242 dic = xml_dict ["oml:flow" ]
177243 flow_id = int (dic ['oml:id' ]) if 'oml:id' in dic else None
178244 uploader = dic .get ('oml:uploader' )
@@ -237,7 +303,11 @@ def _from_xml(cls, xml_dict):
237303 flow_id = flow_id )
238304
239305 def __eq__ (self , other ):
240- """Override the default Equals behavior"""
306+ """Check equality.
307+
308+ Two flows are equal if their all keys which are not set by the server
309+ are equal, as well as all their parameters and components.
310+ """
241311 if isinstance (other , self .__class__ ):
242312 this_dict = self .__dict__ .copy ()
243313 this_parameters = this_dict ['parameters' ]
@@ -253,7 +323,8 @@ def __eq__(self, other):
253323 del other_dict ['components' ]
254324 del other_dict ['model' ]
255325
256- # Name is actually not generated by the server, but it will be tested further down with a getter (allows mocking)
326+ # Name is actually not generated by the server, but it will be
327+ # tested further down with a getter (allows mocking in the tests)
257328 generated_by_the_server = ['name' , 'flow_id' , 'uploader' , 'version' ,
258329 'upload_date' , 'source_url' ,
259330 'binary_url' , 'source_format' ,
@@ -267,14 +338,18 @@ def __eq__(self, other):
267338 equal = this_dict == other_dict
268339 equal_name = self ._get_name () == other ._get_name ()
269340
270- parameters_equal = this_parameters .keys () == other_parameters .keys () and \
271- all ([this_parameter == other_parameter
272- for this_parameter , other_parameter in
273- zip (this_parameters .values (), other_parameters .values ())])
274- components_equal = this_components .keys () == other_components .keys () and \
275- all ([this_component == other_component
276- for this_component , other_component in
277- zip (this_components .values (), other_components .values ())])
341+ parameters_equal = \
342+ this_parameters .keys () == other_parameters .keys () and \
343+ all ([this_parameter == other_parameter
344+ for this_parameter , other_parameter in
345+ zip (this_parameters .values (),
346+ other_parameters .values ())])
347+ components_equal = \
348+ this_components .keys () == other_components .keys () and \
349+ all ([this_component == other_component
350+ for this_component , other_component in
351+ zip (this_components .values (),
352+ other_components .values ())])
278353
279354 return parameters_equal and components_equal and equal and equal_name
280355 return NotImplemented
@@ -333,6 +408,27 @@ def _get_name(self):
333408
334409
335410def create_flow_from_model (model , converter , description = None ):
411+ """Use a converter to create an OpenMLFlow from model.
412+
413+ Allows to configure how a model (for example a scikit-learn estimator) is
414+ transformed into an OpenMLFlow.
415+
416+ Parameters
417+ ----------
418+ model : object
419+ ML model. Must match the converter.
420+ converter : object
421+ Class that implements a method `flow = serialize_object(model)`.
422+ Abstract interface to come soon.
423+ description : str, optional
424+ Provide a description of the flow, overwriting the default description
425+ generated by the converter.
426+
427+ Returns
428+ -------
429+ OpenMLFlow
430+
431+ """
336432 flow = converter .serialize_object (model )
337433 if not isinstance (flow , OpenMLFlow ):
338434 raise ValueError ('Converter %s did return %s, not OpenMLFlow!' %
0 commit comments