Skip to content

Commit 2513667

Browse files
ArlindKadramfeurer
authored andcommitted
Paging (#426)
* Created first basic template, removed redudant variable * Improving list_datasets * First implementation of the list_* with the limit tag active * First implementation of the feature, fixed bugs and refactored the code * Changing batch_size to be a keyword argument * Fixed not considering initial offset, removing size and the double offset key from the filter dict * Changing task_type_id argument name in accordance with the new implementation * Reverting previous solution for task_type_id, implementing another fix * Fix for python2 and changing the unit test which times out * Added another test method and did a slight change in an existing test method * Changing the assert value for the failing test method * Added the implementation to filter by uploader for flows, filter by task_type for runs, filter by multipple operator for tasks and also refactored the code according to PEP8 * Refactored code as requested
1 parent 3e99d99 commit 2513667

9 files changed

Lines changed: 306 additions & 138 deletions

File tree

openml/datasets/functions.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from oslo_concurrency import lockutils
99
import xmltodict
1010

11+
import openml.utils
1112
from .dataset import OpenMLDataset
1213
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
1314
from .. import config
@@ -137,8 +138,10 @@ def _get_cached_dataset_arff(dataset_id):
137138
"cached" % dataset_id)
138139

139140

140-
def list_datasets(offset=None, size=None, status=None, **kwargs):
141-
"""Return a list of all dataset which are on OpenML.
141+
def list_datasets(offset=None, size=None, status=None, tag=None, **kwargs):
142+
143+
"""
144+
Return a list of all dataset which are on OpenML. (Supports large amount of results)
142145
143146
Parameters
144147
----------
@@ -150,9 +153,11 @@ def list_datasets(offset=None, size=None, status=None, **kwargs):
150153
Should be {active, in_preparation, deactivated}. By
151154
default active datasets are returned, but also datasets
152155
from another status can be requested.
156+
tag : str, optional
153157
kwargs : dict, optional
154158
Legal filter operators (keys in the dict):
155-
{tag, status, limit, offset, data_name, data_version, number_instances, number_features, number_classes, number_missing_values}.
159+
data_name, data_version, number_instances,
160+
number_features, number_classes, number_missing_values.
156161
157162
Returns
158163
-------
@@ -169,29 +174,38 @@ def list_datasets(offset=None, size=None, status=None, **kwargs):
169174
If qualities are calculated for the dataset, some of
170175
these are also returned.
171176
"""
172-
api_call = "data/list"
173-
if offset is not None:
174-
api_call += "/offset/%d" % int(offset)
175177

176-
if size is not None:
177-
api_call += "/limit/%d" % int(size)
178+
return openml.utils.list_all(_list_datasets, offset=offset, size=size, status=status, tag=tag, **kwargs)
178179

179-
if status is not None:
180-
api_call += "/status/%s" %status
180+
181+
def _list_datasets(**kwargs):
182+
183+
"""
184+
Perform api call to return a list of all datasets.
185+
186+
Parameters
187+
----------
188+
kwargs : dict, optional
189+
Legal filter operators (keys in the dict):
190+
{tag, status, limit, offset, data_name, data_version, number_instances,
191+
number_features, number_classes, number_missing_values.
192+
193+
Returns
194+
-------
195+
datasets : dict of dicts
196+
"""
197+
198+
api_call = "data/list"
181199

182200
if kwargs is not None:
183-
for filter, value in kwargs.items():
184-
api_call += "/%s/%s" % (filter, value)
201+
for operator, value in kwargs.items():
202+
api_call += "/%s/%s" % (operator, value)
203+
return __list_datasets(api_call)
185204

186-
return _list_datasets(api_call)
187205

206+
def __list_datasets(api_call):
188207

189-
def _list_datasets(api_call):
190-
# TODO add proper error handling here!
191-
try:
192-
xml_string = _perform_api_call(api_call)
193-
except OpenMLServerNoResult:
194-
return dict()
208+
xml_string = _perform_api_call(api_call)
195209
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
196210

197211
# Minimalistic check if the XML is useful
@@ -224,7 +238,7 @@ def check_datasets_active(dataset_ids):
224238
225239
Parameters
226240
----------
227-
dataset_id : iterable
241+
dataset_ids : iterable
228242
Integers representing dataset ids.
229243
230244
Returns
@@ -279,7 +293,7 @@ def get_dataset(dataset_id):
279293
280294
Parameters
281295
----------
282-
ddataset_id : int
296+
dataset_id : int
283297
Dataset ID of the dataset to download
284298
285299
Returns

openml/evaluations/functions.py

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
import xmltodict
22

33
from openml.exceptions import OpenMLServerNoResult
4+
import openml.utils
45
from .._api_calls import _perform_api_call
56
from ..evaluations import OpenMLEvaluation
67

78

89
def list_evaluations(function, offset=None, size=None, id=None, task=None,
910
setup=None, flow=None, uploader=None, tag=None):
10-
"""List all run-evaluation pairs matching all of the given filters.
11+
"""
12+
List all run-evaluation pairs matching all of the given filters.
13+
(Supports large amount of results)
1114
12-
Perform API call ``/evaluation/function{function}/{filters}``
13-
1415
Parameters
1516
----------
16-
function : str
17+
function : str
1718
the evaluation function. e.g., predictive_accuracy
1819
offset : int, optional
1920
the number of runs to skip, starting from the first
@@ -37,11 +38,45 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,
3738
dict
3839
"""
3940

40-
api_call = "evaluation/list/function/%s" %function
41-
if offset is not None:
42-
api_call += "/offset/%d" % int(offset)
43-
if size is not None:
44-
api_call += "/limit/%d" % int(size)
41+
return openml.utils.list_all(_list_evaluations, function, offset=offset, size=size,
42+
id=id, task=task, setup=setup, flow=flow, uploader=uploader, tag=tag)
43+
44+
45+
def _list_evaluations(function, id=None, task=None,
46+
setup=None, flow=None, uploader=None, **kwargs):
47+
"""
48+
Perform API call ``/evaluation/function{function}/{filters}``
49+
50+
Parameters
51+
----------
52+
The arguments that are lists are separated from the single value
53+
ones which are put into the kwargs.
54+
55+
function : str
56+
the evaluation function. e.g., predictive_accuracy
57+
58+
id : list, optional
59+
60+
task : list, optional
61+
62+
setup: list, optional
63+
64+
flow : list, optional
65+
66+
uploader : list, optional
67+
68+
kwargs: dict, optional
69+
Legal filter operators: tag, limit, offset.
70+
71+
Returns
72+
-------
73+
dict
74+
"""
75+
76+
api_call = "evaluation/list/function/%s" % function
77+
if kwargs is not None:
78+
for operator, value in kwargs.items():
79+
api_call += "/%s/%s" % (operator, value)
4580
if id is not None:
4681
api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
4782
if task is not None:
@@ -52,19 +87,13 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,
5287
api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
5388
if uploader is not None:
5489
api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
55-
if tag is not None:
56-
api_call += "/tag/%s" % tag
5790

58-
return _list_evaluations(api_call)
91+
return __list_evaluations(api_call)
5992

6093

61-
def _list_evaluations(api_call):
94+
def __list_evaluations(api_call):
6295
"""Helper function to parse API calls which are lists of runs"""
63-
try:
64-
xml_string = _perform_api_call(api_call)
65-
except OpenMLServerNoResult:
66-
return dict()
67-
96+
xml_string = _perform_api_call(api_call)
6897
evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
6998
# Minimalistic check if the XML is useful
7099
if 'oml:evaluations' not in evals_dict:
@@ -88,5 +117,4 @@ def _list_evaluations(api_call):
88117
eval_['oml:upload_time'], float(eval_['oml:value']),
89118
array_data)
90119
evals[run_id] = evaluation
91-
return evals
92-
120+
return evals

openml/flows/functions.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from openml._api_calls import _perform_api_call
77
from openml.exceptions import OpenMLServerNoResult
88
from . import OpenMLFlow
9+
import openml.utils
910

1011

1112
def get_flow(flow_id):
@@ -30,8 +31,11 @@ def get_flow(flow_id):
3031
return flow
3132

3233

33-
def list_flows(offset=None, size=None, tag=None):
34-
"""Return a list of all flows which are on OpenML.
34+
def list_flows(offset=None, size=None, tag=None, **kwargs):
35+
36+
"""
37+
Return a list of all flows which are on OpenML.
38+
(Supports large amount of results)
3539
3640
Parameters
3741
----------
@@ -41,6 +45,8 @@ def list_flows(offset=None, size=None, tag=None):
4145
the maximum number of flows to return
4246
tag : str, optional
4347
the tag to include
48+
kwargs: dict, optional
49+
Legal filter operators: uploader.
4450
4551
Returns
4652
-------
@@ -57,17 +63,29 @@ def list_flows(offset=None, size=None, tag=None):
5763
- external version
5864
- uploader
5965
"""
60-
api_call = "flow/list"
61-
if offset is not None:
62-
api_call += "/offset/%d" % int(offset)
66+
return openml.utils.list_all(_list_flows, offset=offset, size=size, tag=tag, **kwargs)
67+
68+
69+
def _list_flows(**kwargs):
70+
"""
71+
Perform the api call that return a list of all flows.
72+
73+
Parameters
74+
----------
75+
kwargs: dict, optional
76+
Legal filter operators: uploader, tag, limit, offset.
6377
64-
if size is not None:
65-
api_call += "/limit/%d" % int(size)
78+
Returns
79+
-------
80+
flows : dict
81+
"""
82+
api_call = "flow/list"
6683

67-
if tag is not None:
68-
api_call += "/tag/%s" % tag
84+
if kwargs is not None:
85+
for operator, value in kwargs.items():
86+
api_call += "/%s/%s" % (operator, value)
6987

70-
return _list_flows(api_call)
88+
return __list_flows(api_call)
7189

7290

7391
def flow_exists(name, external_version):
@@ -79,7 +97,7 @@ def flow_exists(name, external_version):
7997
----------
8098
name : string
8199
Name of the flow
82-
version : string
100+
external_version : string
83101
Version information associated with flow.
84102
85103
Returns
@@ -108,11 +126,9 @@ def flow_exists(name, external_version):
108126
return False
109127

110128

111-
def _list_flows(api_call):
112-
try:
113-
xml_string = _perform_api_call(api_call)
114-
except OpenMLServerNoResult:
115-
return dict()
129+
def __list_flows(api_call):
130+
131+
xml_string = _perform_api_call(api_call)
116132
flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
117133

118134
# Minimalistic check if the XML is useful
@@ -186,11 +202,11 @@ def assert_flows_equal(flow1, flow2,
186202
# Tags aren't directly created by the server,
187203
# but the uploader has no control over them!
188204
'tags']
189-
ignored_by_python_API = ['binary_url', 'binary_format', 'binary_md5',
205+
ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
190206
'model']
191207

192208
for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
193-
if key in generated_by_the_server + ignored_by_python_API:
209+
if key in generated_by_the_server + ignored_by_python_api:
194210
continue
195211
attr1 = getattr(flow1, key, None)
196212
attr2 = getattr(flow2, key, None)

0 commit comments

Comments
 (0)