22import xmltodict
33
44from .._api_calls import _perform_api_call
5- from .functions import _check_flow_exists
65
76
87class OpenMLFlow (object ):
@@ -14,8 +13,6 @@ class OpenMLFlow(object):
1413 The model the flow consists of. The model needs to have fit and predict methods.
1514 description : string
1615 Description of the flow (free text).
17- creator : string
18- FIXME
1916 contributor : string
2017 FIXME
2118 tag : string
@@ -28,64 +25,221 @@ class OpenMLFlow(object):
2825
2926 """
3027 def __init__ (self , name , description = None , model = None , components = None ,
31- parameters = None , external_version = None , creator = None ,
32- uploader = None , tag = None , flow_id = None ):
28+ parameters = None , parameters_meta_info = None ,
29+ external_version = None , uploader = None , tags = None ,
30+ binary_url = None , binary_format = None , binary_md5 = None ,
31+ version = None , upload_date = None , language = None ,
32+ dependencies = None , flow_id = None ):
3333 self .name = name
3434 self .description = description
3535 self .model = model
3636
3737 if components is None :
3838 components = OrderedDict ()
3939 elif not isinstance (components , OrderedDict ):
40- raise TypeError ('Components must be of type OrderedDict, but are %s.' %
40+ raise TypeError ('components must be of type OrderedDict, but is %s.' %
4141 type (components ))
4242 self .components = components
4343 if parameters is None :
4444 parameters = OrderedDict ()
4545 elif not isinstance (parameters , OrderedDict ):
46- raise TypeError ('Parameters must be of type OrderedDict, but are %s.' %
46+ raise TypeError ('parameters must be of type OrderedDict, but is %s.' %
4747 type (parameters ))
48+ if parameters_meta_info is None :
49+ parameters_meta_info = OrderedDict ()
50+ elif not isinstance (parameters_meta_info , OrderedDict ):
51+ raise TypeError ('parameters_meta_info must be of type OrderedDict, but is %s.' %
52+ type (parameters_meta_info ))
53+ keys_parameters = set (parameters .keys ())
54+ keys_parameters_meta_info = set (parameters_meta_info .keys ())
55+ if len (keys_parameters .difference (keys_parameters_meta_info )) > 0 :
56+ raise ValueError ('Parameter %s only in parameters, but not in'
57+ 'parameters_meta_info.' %
58+ str (keys_parameters .difference (keys_parameters_meta_info )))
59+ if len (keys_parameters_meta_info .difference (keys_parameters )) > 0 :
60+ raise ValueError ('Parameter %s only in parameters_meta_info, but not in'
61+ 'parameters.' %
62+ str (keys_parameters_meta_info .difference (keys_parameters )))
63+
4864 self .parameters = parameters
65+ self .parameters_meta_info = parameters_meta_info
4966
5067 self .external_version = external_version
51- self .creator = creator
5268 self .upoader = uploader
53- self .tag = tag
69+
70+ if tags is None :
71+ tags = []
72+ self .tags = tags
73+ self .binary_url = binary_url
74+ self .binary_format = binary_format
75+ self .binary_md5 = binary_md5
76+ self .version = version
77+ self .upload_date = upload_date
78+ self .language = language
79+ self .dependencies = dependencies
5480 self .flow_id = flow_id
5581
56- def _generate_flow_xml (self ):
82+ def _to_xml (self ):
5783 """Generate xml representation of self for upload to server.
5884
5985 Returns
6086 -------
6187 flow_xml : string
6288 Flow represented as XML string.
6389 """
64- model = self .model
90+ flow_dict = self .__to_dict ()
91+ flow_xml = xmltodict .unparse (flow_dict , pretty = True )
6592
93+ # A flow may not be uploaded with the encoding specification..
94+ flow_xml = flow_xml .split ('\n ' , 1 )[- 1 ]
95+ return flow_xml
96+
97+ def __to_dict (self ):
6698 flow_dict = OrderedDict ()
6799 flow_dict ['oml:flow' ] = OrderedDict ()
68100 flow_dict ['oml:flow' ]['@xmlns:oml' ] = 'http://openml.org/openml'
101+ if self .flow_id is not None :
102+ flow_dict ['oml:flow' ]['oml:id' ] = self .flow_id
103+ if self .upoader is not None :
104+ flow_dict ['oml:flow' ]['oml:uploader' ] = self .upoader
69105 flow_dict ['oml:flow' ]['oml:name' ] = self ._get_name ()
106+ if self .version is not None :
107+ flow_dict ['oml:flow' ]['oml:version' ] = self .version
70108 flow_dict ['oml:flow' ]['oml:external_version' ] = self .external_version
71109 flow_dict ['oml:flow' ]['oml:description' ] = self .description
110+ if self .upload_date is not None :
111+ flow_dict ['oml:flow' ]['oml:upload_date' ] = self .upload_date
112+ if self .language is not None :
113+ flow_dict ['oml:flow' ]['oml:language' ] = self .language
114+ if self .dependencies is not None :
115+ flow_dict ['oml:flow' ]['oml:dependencies' ] = self .dependencies
72116
73- clf_params = model .get_params ()
74117 flow_parameters = []
75- for k , v in clf_params .items ():
76- # data_type, default_value, description, recommendedRange
77- # type = v.__class__.__name__ Not using this because it doesn't conform standards
78- # eg. int instead of integer
79- param_dict = {'oml:name' : k }
118+ for key in self .parameters :
119+ param_dict = OrderedDict ()
120+ param_dict ['oml:name' ] = key
121+ if self .parameters_meta_info [key ]['data_type' ] is not None :
122+ param_dict ['oml:data_type' ] = self .parameters_meta_info [key ].get ('data_type' )
123+ param_dict ['oml:default_value' ] = self .parameters [key ],
124+ if self .parameters_meta_info [key ]['description' ] is not None :
125+ param_dict ['oml:description' ] = self .parameters_meta_info [key ].get ('description' )
80126 flow_parameters .append (param_dict )
81127
82128 flow_dict ['oml:flow' ]['oml:parameter' ] = flow_parameters
83129
84- flow_xml = xmltodict .unparse (flow_dict , pretty = True )
130+ components = []
131+ for key in self .components :
132+ component_dict = OrderedDict ()
133+ component_dict ['oml:identifier' ] = key
134+ component_dict ['oml:flow' ] = self .components [key ].__to_dict ()['oml:flow' ]
135+ components .append (component_dict )
136+ flow_dict ['oml:flow' ]['oml:component' ] = components
85137
86- # A flow may not be uploaded with the encoding specification..
87- flow_xml = flow_xml .split ('\n ' , 1 )[- 1 ]
88- return flow_xml
138+ flow_dict ['oml:flow' ]['oml:tag' ] = self .tags
139+
140+ if self .binary_url is not None :
141+ flow_dict ['oml:flow' ]['oml:binary_url' ] = self .binary_url
142+ if self .binary_format is not None :
143+ flow_dict ['oml:flow' ]['oml:binary_format' ] = self .binary_format
144+ if self .binary_md5 is not None :
145+ flow_dict ['oml:flow' ]['oml:binary_md5' ] = self .binary_md5
146+
147+ return flow_dict
148+
149+ @classmethod
150+ def _from_xml (cls , xml_dict ):
151+ dic = xml_dict ["oml:flow" ]
152+ flow_id = int (dic ['oml:id' ]) if 'oml:id' in dic else None
153+ uploader = dic .get ('oml:uploader' )
154+ name = dic ['oml:name' ]
155+ external_version = dic .get ('oml:external_version' )
156+ description = dic .get ('oml:description' )
157+ upload_date = dic .get ('oml:upload_date' )
158+ language = dic .get ('oml:language' )
159+ dependencies = dic .get ('oml:dependencies' )
160+ version = dic .get ('oml:version' )
161+ binary_url = dic .get ('oml:binary_url' )
162+ binary_format = dic .get ('oml:binary_format' )
163+ binary_md5 = dic .get ('oml:binary_md5' )
164+
165+ parameters = OrderedDict ()
166+ parameters_meta_info = OrderedDict ()
167+ if 'oml:parameter' in dic :
168+ if isinstance (dic ['oml:parameter' ], dict ):
169+ oml_parameters = [dic ['oml:parameter' ]]
170+ else :
171+ oml_parameters = dic ['oml:parameter' ]
172+
173+ for oml_parameter in oml_parameters :
174+ parameter_name = oml_parameter ['oml:name' ]
175+ default_value = oml_parameter ['oml:default_value' ]
176+ parameters [parameter_name ] = default_value
177+
178+ meta_info = dict ()
179+ meta_info ['description' ] = oml_parameter .get ('oml:description' )
180+ meta_info ['data_type' ] = oml_parameter .get ('oml:data_type' )
181+ parameters_meta_info [parameter_name ] = meta_info
182+
183+ components = OrderedDict ()
184+ if 'oml:component' in dic :
185+ if isinstance (dic ['oml:component' ], dict ):
186+ oml_components = [dic ['oml:component' ]]
187+ else :
188+ oml_components = dic ['oml:component' ]
189+
190+ for component in oml_components :
191+ flow = OpenMLFlow ._from_xml (component )
192+ components [component ['oml:identifier' ]] = flow
193+
194+ tags = []
195+ if 'oml:tag' in dic and dic ['oml:tag' ] is not None :
196+ if isinstance (dic ['oml:tag' ], dict ):
197+ oml_tags = [dic ['oml:tag' ]]
198+ else :
199+ oml_tags = dic ['oml:tag' ]
200+
201+ for tag in oml_tags :
202+ tags .append (tag )
203+
204+ return cls (name = name , description = description , model = None ,
205+ components = components , parameters = parameters ,
206+ parameters_meta_info = parameters_meta_info ,
207+ external_version = external_version ,
208+ uploader = uploader , tags = tags , version = version ,
209+ upload_date = upload_date , language = language ,
210+ dependencies = dependencies , binary_url = binary_url ,
211+ binary_format = binary_format , binary_md5 = binary_md5 ,
212+ flow_id = flow_id )
213+
214+ def __eq__ (self , other ):
215+ """Override the default Equals behavior"""
216+ if isinstance (other , self .__class__ ):
217+ this_dict = self .__dict__ .copy ()
218+ this_parameters = this_dict ['parameters' ]
219+ del this_dict ['parameters' ]
220+ this_components = this_dict ['components' ]
221+ del this_dict ['components' ]
222+ del this_dict ['model' ]
223+
224+ other_dict = other .__dict__ .copy ()
225+ other_parameters = other_dict ['parameters' ]
226+ del other_dict ['parameters' ]
227+ other_components = other_dict ['components' ]
228+ del other_dict ['components' ]
229+ del other_dict ['model' ]
230+
231+ parameters_equal = this_parameters .keys () == other_parameters .keys () and \
232+ all ([this_parameter == other_parameter
233+ for this_parameter , other_parameter in
234+ zip (this_parameters , other_parameters )])
235+ components_equal = this_components .keys () == other_components .keys () and \
236+ all ([this_component == other_component
237+ for this_component , other_component in
238+ zip (this_components , other_components )])
239+ equal = this_dict == other_dict
240+
241+ return parameters_equal and components_equal and equal
242+ return NotImplemented
89243
90244 def publish (self ):
91245 """Publish flow to OpenML server.
@@ -95,7 +249,7 @@ def publish(self):
95249 self : OpenMLFlow
96250
97251 """
98- xml_description = self ._generate_flow_xml ()
252+ xml_description = self ._to_xml ()
99253
100254 file_elements = {'description' : xml_description }
101255 return_code , return_value = _perform_api_call (
@@ -142,3 +296,38 @@ def create_flow_from_model(model, converter, description=None):
142296 flow .description = description
143297
144298 return flow
299+
300+
301+ def _check_flow_exists (name , version ):
302+ """Retrieves the flow id of the flow uniquely identified by name+version.
303+
304+ Parameter
305+ ---------
306+ name : string
307+ Name of the flow
308+ version : string
309+ Version information associated with flow.
310+
311+ Returns
312+ -------
313+ flow_exist : int
314+ Flow id or -1 if the flow doesn't exist.
315+
316+ Notes
317+ -----
318+ see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
319+ """
320+ if not (type (name ) is str and len (name ) > 0 ):
321+ raise ValueError ('Argument \' name\' should be a non-empty string' )
322+ if not (type (version ) is str and len (version ) > 0 ):
323+ raise ValueError ('Argument \' version\' should be a non-empty string' )
324+
325+ return_code , xml_response = _perform_api_call (
326+ "flow/exists/%s/%s" % (name , version ))
327+ # TODO check with latest version of code if this raises an exception
328+ if return_code != 200 :
329+ # fixme raise appropriate error
330+ raise ValueError ("api call failed: %s" % xml_response )
331+ xml_dict = xmltodict .parse (xml_response )
332+ flow_id = xml_dict ['oml:flow_exists' ]['oml:id' ]
333+ return return_code , xml_response , flow_id
0 commit comments