Skip to content

Commit 5f78c73

Browse files
committed
ADD get_flow, serialization, deserialization of flow
1 parent 52d0140 commit 5f78c73

7 files changed

Lines changed: 335 additions & 59 deletions

File tree

openml/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@
1919
from .datasets import OpenMLDataset
2020
from . import datasets
2121
from . import runs
22+
from . import flows
2223
from .runs import OpenMLRun
2324
from .tasks import OpenMLTask, OpenMLSplit
2425
from .flows import OpenMLFlow
2526

2627

2728
__version__ = "0.2.1"
2829

29-
__all__ = ['OpenMLDataset', 'OpenMLRun', 'OpenMLSplit',
30-
'datasets', 'OpenMLTask', 'OpenMLFlow', 'config', 'runs']
30+
__all__ = ['OpenMLDataset', 'OpenMLRun', 'OpenMLSplit', 'datasets',
31+
'OpenMLTask', 'OpenMLFlow', 'config', 'runs', 'flows']

openml/flows/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .flow import OpenMLFlow, create_flow_from_model
2+
from .functions import get_flow
23

3-
__all__ = ['OpenMLFlow', 'create_flow_from_model']
4+
__all__ = ['OpenMLFlow', 'create_flow_from_model', 'get_flow']

openml/flows/flow.py

Lines changed: 211 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import xmltodict
33

44
from .._api_calls import _perform_api_call
5-
from .functions import _check_flow_exists
65

76

87
class OpenMLFlow(object):
@@ -14,8 +13,6 @@ class OpenMLFlow(object):
1413
The model the flow consists of. The model needs to have fit and predict methods.
1514
description : string
1615
Description of the flow (free text).
17-
creator : string
18-
FIXME
1916
contributor : string
2017
FIXME
2118
tag : string
@@ -28,64 +25,221 @@ class OpenMLFlow(object):
2825
2926
"""
3027
def __init__(self, name, description=None, model=None, components=None,
31-
parameters=None, external_version=None, creator=None,
32-
uploader=None, tag=None, flow_id=None):
28+
parameters=None, parameters_meta_info=None,
29+
external_version=None, uploader=None, tags=None,
30+
binary_url=None, binary_format=None, binary_md5=None,
31+
version=None, upload_date=None, language=None,
32+
dependencies=None, flow_id=None):
3333
self.name = name
3434
self.description = description
3535
self.model = model
3636

3737
if components is None:
3838
components = OrderedDict()
3939
elif not isinstance(components, OrderedDict):
40-
raise TypeError('Components must be of type OrderedDict, but are %s.' %
40+
raise TypeError('components must be of type OrderedDict, but is %s.' %
4141
type(components))
4242
self.components = components
4343
if parameters is None:
4444
parameters = OrderedDict()
4545
elif not isinstance(parameters, OrderedDict):
46-
raise TypeError('Parameters must be of type OrderedDict, but are %s.' %
46+
raise TypeError('parameters must be of type OrderedDict, but is %s.' %
4747
type(parameters))
48+
if parameters_meta_info is None:
49+
parameters_meta_info = OrderedDict()
50+
elif not isinstance(parameters_meta_info, OrderedDict):
51+
raise TypeError('parameters_meta_info must be of type OrderedDict, but is %s.' %
52+
type(parameters_meta_info))
53+
keys_parameters = set(parameters.keys())
54+
keys_parameters_meta_info = set(parameters_meta_info.keys())
55+
if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
56+
raise ValueError('Parameter %s only in parameters, but not in'
57+
'parameters_meta_info.' %
58+
str(keys_parameters.difference(keys_parameters_meta_info)))
59+
if len(keys_parameters_meta_info.difference(keys_parameters)) > 0:
60+
raise ValueError('Parameter %s only in parameters_meta_info, but not in'
61+
'parameters.' %
62+
str(keys_parameters_meta_info.difference(keys_parameters)))
63+
4864
self.parameters = parameters
65+
self.parameters_meta_info = parameters_meta_info
4966

5067
self.external_version = external_version
51-
self.creator = creator
5268
self.upoader = uploader
53-
self.tag = tag
69+
70+
if tags is None:
71+
tags = []
72+
self.tags = tags
73+
self.binary_url = binary_url
74+
self.binary_format = binary_format
75+
self.binary_md5 = binary_md5
76+
self.version = version
77+
self.upload_date = upload_date
78+
self.language = language
79+
self.dependencies = dependencies
5480
self.flow_id = flow_id
5581

56-
def _generate_flow_xml(self):
82+
def _to_xml(self):
5783
"""Generate xml representation of self for upload to server.
5884
5985
Returns
6086
-------
6187
flow_xml : string
6288
Flow represented as XML string.
6389
"""
64-
model = self.model
90+
flow_dict = self.__to_dict()
91+
flow_xml = xmltodict.unparse(flow_dict, pretty=True)
6592

93+
# A flow may not be uploaded with the encoding specification..
94+
flow_xml = flow_xml.split('\n', 1)[-1]
95+
return flow_xml
96+
97+
def __to_dict(self):
6698
flow_dict = OrderedDict()
6799
flow_dict['oml:flow'] = OrderedDict()
68100
flow_dict['oml:flow']['@xmlns:oml'] = 'http://openml.org/openml'
101+
if self.flow_id is not None:
102+
flow_dict['oml:flow']['oml:id'] = self.flow_id
103+
if self.upoader is not None:
104+
flow_dict['oml:flow']['oml:uploader'] = self.upoader
69105
flow_dict['oml:flow']['oml:name'] = self._get_name()
106+
if self.version is not None:
107+
flow_dict['oml:flow']['oml:version'] = self.version
70108
flow_dict['oml:flow']['oml:external_version'] = self.external_version
71109
flow_dict['oml:flow']['oml:description'] = self.description
110+
if self.upload_date is not None:
111+
flow_dict['oml:flow']['oml:upload_date'] = self.upload_date
112+
if self.language is not None:
113+
flow_dict['oml:flow']['oml:language'] = self.language
114+
if self.dependencies is not None:
115+
flow_dict['oml:flow']['oml:dependencies'] = self.dependencies
72116

73-
clf_params = model.get_params()
74117
flow_parameters = []
75-
for k, v in clf_params.items():
76-
# data_type, default_value, description, recommendedRange
77-
# type = v.__class__.__name__ Not using this because it doesn't conform standards
78-
# eg. int instead of integer
79-
param_dict = {'oml:name': k}
118+
for key in self.parameters:
119+
param_dict = OrderedDict()
120+
param_dict['oml:name'] = key
121+
if self.parameters_meta_info[key]['data_type'] is not None:
122+
param_dict['oml:data_type'] = self.parameters_meta_info[key].get('data_type')
123+
param_dict['oml:default_value'] = self.parameters[key],
124+
if self.parameters_meta_info[key]['description'] is not None:
125+
param_dict['oml:description'] = self.parameters_meta_info[key].get('description')
80126
flow_parameters.append(param_dict)
81127

82128
flow_dict['oml:flow']['oml:parameter'] = flow_parameters
83129

84-
flow_xml = xmltodict.unparse(flow_dict, pretty=True)
130+
components = []
131+
for key in self.components:
132+
component_dict = OrderedDict()
133+
component_dict['oml:identifier'] = key
134+
component_dict['oml:flow'] = self.components[key].__to_dict()['oml:flow']
135+
components.append(component_dict)
136+
flow_dict['oml:flow']['oml:component'] = components
85137

86-
# A flow may not be uploaded with the encoding specification..
87-
flow_xml = flow_xml.split('\n', 1)[-1]
88-
return flow_xml
138+
flow_dict['oml:flow']['oml:tag'] = self.tags
139+
140+
if self.binary_url is not None:
141+
flow_dict['oml:flow']['oml:binary_url'] = self.binary_url
142+
if self.binary_format is not None:
143+
flow_dict['oml:flow']['oml:binary_format'] = self.binary_format
144+
if self.binary_md5 is not None:
145+
flow_dict['oml:flow']['oml:binary_md5'] = self.binary_md5
146+
147+
return flow_dict
148+
149+
@classmethod
150+
def _from_xml(cls, xml_dict):
151+
dic = xml_dict["oml:flow"]
152+
flow_id = int(dic['oml:id']) if 'oml:id' in dic else None
153+
uploader = dic.get('oml:uploader')
154+
name = dic['oml:name']
155+
external_version = dic.get('oml:external_version')
156+
description = dic.get('oml:description')
157+
upload_date = dic.get('oml:upload_date')
158+
language = dic.get('oml:language')
159+
dependencies = dic.get('oml:dependencies')
160+
version = dic.get('oml:version')
161+
binary_url = dic.get('oml:binary_url')
162+
binary_format = dic.get('oml:binary_format')
163+
binary_md5 = dic.get('oml:binary_md5')
164+
165+
parameters = OrderedDict()
166+
parameters_meta_info = OrderedDict()
167+
if 'oml:parameter' in dic:
168+
if isinstance(dic['oml:parameter'], dict):
169+
oml_parameters = [dic['oml:parameter']]
170+
else:
171+
oml_parameters = dic['oml:parameter']
172+
173+
for oml_parameter in oml_parameters:
174+
parameter_name = oml_parameter['oml:name']
175+
default_value = oml_parameter['oml:default_value']
176+
parameters[parameter_name] = default_value
177+
178+
meta_info = dict()
179+
meta_info['description'] = oml_parameter.get('oml:description')
180+
meta_info['data_type'] = oml_parameter.get('oml:data_type')
181+
parameters_meta_info[parameter_name] = meta_info
182+
183+
components = OrderedDict()
184+
if 'oml:component' in dic:
185+
if isinstance(dic['oml:component'], dict):
186+
oml_components = [dic['oml:component']]
187+
else:
188+
oml_components = dic['oml:component']
189+
190+
for component in oml_components:
191+
flow = OpenMLFlow._from_xml(component)
192+
components[component['oml:identifier']] = flow
193+
194+
tags = []
195+
if 'oml:tag' in dic and dic['oml:tag'] is not None:
196+
if isinstance(dic['oml:tag'], dict):
197+
oml_tags = [dic['oml:tag']]
198+
else:
199+
oml_tags = dic['oml:tag']
200+
201+
for tag in oml_tags:
202+
tags.append(tag)
203+
204+
return cls(name=name, description=description, model=None,
205+
components=components, parameters=parameters,
206+
parameters_meta_info=parameters_meta_info,
207+
external_version=external_version,
208+
uploader=uploader, tags=tags, version=version,
209+
upload_date=upload_date, language=language,
210+
dependencies=dependencies, binary_url=binary_url,
211+
binary_format=binary_format, binary_md5=binary_md5,
212+
flow_id=flow_id)
213+
214+
def __eq__(self, other):
215+
"""Override the default Equals behavior"""
216+
if isinstance(other, self.__class__):
217+
this_dict = self.__dict__.copy()
218+
this_parameters = this_dict['parameters']
219+
del this_dict['parameters']
220+
this_components = this_dict['components']
221+
del this_dict['components']
222+
del this_dict['model']
223+
224+
other_dict = other.__dict__.copy()
225+
other_parameters = other_dict['parameters']
226+
del other_dict['parameters']
227+
other_components = other_dict['components']
228+
del other_dict['components']
229+
del other_dict['model']
230+
231+
parameters_equal = this_parameters.keys() == other_parameters.keys() and \
232+
all([this_parameter == other_parameter
233+
for this_parameter, other_parameter in
234+
zip(this_parameters, other_parameters)])
235+
components_equal = this_components.keys() == other_components.keys() and \
236+
all([this_component == other_component
237+
for this_component, other_component in
238+
zip(this_components, other_components)])
239+
equal = this_dict == other_dict
240+
241+
return parameters_equal and components_equal and equal
242+
return NotImplemented
89243

90244
def publish(self):
91245
"""Publish flow to OpenML server.
@@ -95,7 +249,7 @@ def publish(self):
95249
self : OpenMLFlow
96250
97251
"""
98-
xml_description = self._generate_flow_xml()
252+
xml_description = self._to_xml()
99253

100254
file_elements = {'description': xml_description}
101255
return_code, return_value = _perform_api_call(
@@ -142,3 +296,38 @@ def create_flow_from_model(model, converter, description=None):
142296
flow.description = description
143297

144298
return flow
299+
300+
301+
def _check_flow_exists(name, version):
302+
"""Retrieves the flow id of the flow uniquely identified by name+version.
303+
304+
Parameter
305+
---------
306+
name : string
307+
Name of the flow
308+
version : string
309+
Version information associated with flow.
310+
311+
Returns
312+
-------
313+
flow_exist : int
314+
Flow id or -1 if the flow doesn't exist.
315+
316+
Notes
317+
-----
318+
see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
319+
"""
320+
if not (type(name) is str and len(name) > 0):
321+
raise ValueError('Argument \'name\' should be a non-empty string')
322+
if not (type(version) is str and len(version) > 0):
323+
raise ValueError('Argument \'version\' should be a non-empty string')
324+
325+
return_code, xml_response = _perform_api_call(
326+
"flow/exists/%s/%s" % (name, version))
327+
# TODO check with latest version of code if this raises an exception
328+
if return_code != 200:
329+
# fixme raise appropriate error
330+
raise ValueError("api call failed: %s" % xml_response)
331+
xml_dict = xmltodict.parse(xml_response)
332+
flow_id = xml_dict['oml:flow_exists']['oml:id']
333+
return return_code, xml_response, flow_id

openml/flows/functions.py

Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,31 @@
11
import xmltodict
22

33
from openml._api_calls import _perform_api_call
4+
from . import OpenMLFlow
5+
from ..util import URLError
46

57

6-
def _check_flow_exists(name, version):
7-
"""Retrieves the flow id of the flow uniquely identified by name+version.
8+
def get_flow(flow_id):
9+
"""Download the OpenML flow for a given flow ID.
810
9-
Parameter
10-
---------
11-
name : string
12-
Name of the flow
13-
version : string
14-
Version information associated with flow.
15-
16-
Returns
17-
-------
18-
flow_exist : int
19-
Flow id or -1 if the flow doesn't exist.
20-
21-
Notes
22-
-----
23-
see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
11+
Parameters
12+
----------
13+
flow_id : int
14+
The OpenML flow id.
2415
"""
25-
if not (type(name) is str and len(name) > 0):
26-
raise ValueError('Argument \'name\' should be a non-empty string')
27-
if not (type(version) is str and len(version) > 0):
28-
raise ValueError('Argument \'version\' should be a non-empty string')
16+
try:
17+
flow_id = int(flow_id)
18+
except:
19+
raise ValueError("Flow ID is neither an Integer nor can be "
20+
"cast to an Integer.")
21+
22+
try:
23+
return_code, flow_xml = _perform_api_call(
24+
"flow/%d" % flow_id)
25+
except (URLError, UnicodeEncodeError) as e:
26+
print(e)
27+
raise e
2928

30-
return_code, xml_response = _perform_api_call(
31-
"flow/exists/%s/%s" % (name, version))
32-
# TODO check with latest version of code if this raises an exception
33-
if return_code != 200:
34-
# fixme raise appropriate error
35-
raise ValueError("api call failed: %s" % xml_response)
36-
xml_dict = xmltodict.parse(xml_response)
37-
flow_id = xml_dict['oml:flow_exists']['oml:id']
38-
return return_code, xml_response, flow_id
29+
flow_dict = xmltodict.parse(flow_xml)
30+
flow = OpenMLFlow._from_xml(flow_dict)
31+
return flow

0 commit comments

Comments
 (0)