Skip to content

Commit 4c87829

Browse files
authored
Merge pull request #171 from openml/runpagination
added run pagination
2 parents a407b75 + 7dba90f commit 4c87829

4 files changed

Lines changed: 88 additions & 208 deletions

File tree

openml/runs/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
from .run import OpenMLRun
2-
from .functions import (run_task, get_run, list_runs, list_runs_by_flow,
3-
list_runs_by_tag, list_runs_by_task,
4-
list_runs_by_uploader, list_runs_by_filters)
2+
from .functions import (run_task, get_run, list_runs)
53

6-
__all__ = ['OpenMLRun', 'run_task', 'get_run', 'list_runs', 'list_runs_by_flow',
7-
'list_runs_by_tag', 'list_runs_by_task', 'list_runs_by_uploader',
8-
'list_runs_by_filters']
4+
__all__ = ['OpenMLRun', 'run_task', 'get_run', 'list_runs']

openml/runs/functions.py

Lines changed: 28 additions & 169 deletions
Original file line numberDiff line numberDiff line change
@@ -260,195 +260,54 @@ def _get_cached_run(run_id):
260260
"cached" % run_id)
261261

262262

263-
def list_runs_by_filters(id=None, task=None, flow=None,
264-
uploader=None):
263+
def list_runs(offset=None, size=None, id=None, task=None,
264+
flow=None, uploader=None, tag=None):
265265
"""List all runs matching all of the given filters.
266266
267267
Perform API call `/run/list/{filters} <http://www.openml.org/api_docs/#!/run/get_run_list_filters>`_
268268
269269
Parameters
270270
----------
271-
id : int or list
271+
offset : int, optional
272+
the number of runs to skip, starting from the first
273+
size : int, optional
274+
the maximum number of runs to show
272275
273-
task : int or list
276+
id : list, optional
274277
275-
flow : int or list
278+
task : list, optional
276279
277-
uploader : int or list
280+
flow : list, optional
281+
282+
uploader : list, optional
283+
284+
tag : str, optional
278285
279286
Returns
280287
-------
281288
list
282289
List of found runs.
283290
"""
284291

285-
value = []
286-
by = []
287-
292+
api_call = "run/list"
293+
if offset is not None:
294+
api_call += "/offset/%d" % int(offset)
295+
if size is not None:
296+
api_call += "/limit/%d" % int(size)
288297
if id is not None:
289-
value.append(id)
290-
by.append('run')
298+
api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
291299
if task is not None:
292-
value.append(task)
293-
by.append('task')
300+
api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
294301
if flow is not None:
295-
value.append(flow)
296-
by.append('flow')
302+
api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
297303
if uploader is not None:
298-
value.append(uploader)
299-
by.append('uploader')
300-
301-
if len(value) == 0:
302-
raise ValueError('At least one argument out of task, flow, uploader '
303-
'must have a different value than None')
304-
305-
api_call = "run/list"
306-
for id_, by_ in zip(value, by):
307-
if isinstance(id_, list):
308-
for i in range(len(id_)):
309-
# Type checking to avoid bad calls to the server
310-
id_[i] = str(int(id_[i]))
311-
id_ = ','.join(id_)
312-
else:
313-
# Only type checking here
314-
id_ = int(id_)
315-
316-
if by_ is None:
317-
raise ValueError("Argument 'by' must not contain None!")
318-
api_call = "%s/%s/%s" % (api_call, by_, id_)
304+
api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
305+
if tag is not None:
306+
api_call += "/tag/%s" % tag
319307

320308
return _list_runs(api_call)
321309

322310

323-
def list_runs_by_tag(tag):
324-
"""List runs by tag.
325-
326-
Perform API call `/run/list/tag/{tag} <http://www.openml.org/api_docs/#!/run/get_run_list_tag_tag>`_
327-
328-
Parameters
329-
----------
330-
tag : str
331-
332-
Returns
333-
-------
334-
list
335-
List of found runs.
336-
"""
337-
return _list_runs_by(tag, 'tag')
338-
339-
340-
def list_runs(run_ids):
341-
"""List runs by their ID.
342-
343-
Perform API call `/run/list/run/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_run_ids>`_
344-
345-
Parameters
346-
----------
347-
run_id : int or list
348-
349-
Returns
350-
-------
351-
list
352-
List of found runs.
353-
"""
354-
return _list_runs_by(run_ids, 'run')
355-
356-
357-
def list_runs_by_task(task_id):
358-
"""List runs by task.
359-
360-
Perform API call `/run/list/task/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_task_ids>`_
361-
362-
Parameters
363-
----------
364-
task_id : int or list
365-
366-
Returns
367-
-------
368-
list
369-
List of found runs.
370-
"""
371-
return _list_runs_by(task_id, 'task')
372-
373-
374-
def list_runs_by_flow(flow_id):
375-
"""List runs by flow.
376-
377-
Perform API call `/run/list/flow/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_flow_ids>`_
378-
379-
Parameters
380-
----------
381-
flow_id : int or list
382-
383-
Returns
384-
-------
385-
list
386-
List of found runs.
387-
"""
388-
return _list_runs_by(flow_id, 'flow')
389-
390-
391-
def list_runs_by_uploader(uploader_id):
392-
"""List runs by uploader.
393-
394-
Perform API call `/run/list/uploader/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_uploader_ids>`_
395-
396-
Parameters
397-
----------
398-
uploader_id : int or list
399-
400-
Returns
401-
-------
402-
list
403-
List of found runs.
404-
"""
405-
return _list_runs_by(uploader_id, 'uploader')
406-
407-
408-
def _list_runs_by(id_, by):
409-
"""Helper function to create API call strings.
410-
411-
Helper for the following api calls:
412-
413-
* http://www.openml.org/api_docs/#!/run/get_run_list_task_ids
414-
* http://www.openml.org/api_docs/#!/run/get_run_list_run_ids
415-
* http://www.openml.org/api_docs/#!/run/get_run_list_tag_tag
416-
* http://www.openml.org/api_docs/#!/run/get_run_list_uploader_ids
417-
* http://www.openml.org/api_docs/#!/run/get_run_list_flow_ids
418-
419-
All of these allow either an integer as ID or a list of integers. Their
420-
name follows the convention run/list/{by}/{id}
421-
422-
Parameters
423-
----------
424-
id_ : int or list
425-
426-
by : str
427-
428-
Returns
429-
-------
430-
list
431-
List of found runs.
432-
433-
"""
434-
435-
if isinstance(id_, list):
436-
for i in range(len(id_)):
437-
# Type checking to avoid bad calls to the server
438-
id_[i] = str(int(id_[i]))
439-
id_ = ','.join(id_)
440-
elif by == 'tag':
441-
pass
442-
else:
443-
id_ = int(id_)
444-
445-
api_call = "run/list"
446-
if by is not None:
447-
api_call += "/%s" % by
448-
api_call = "%s/%s" % (api_call, id_)
449-
return _list_runs(api_call)
450-
451-
452311
def _list_runs(api_call):
453312
"""Helper function to parse API calls which are lists of runs"""
454313

@@ -476,15 +335,15 @@ def _list_runs(api_call):
476335
else:
477336
raise TypeError()
478337

479-
runs = []
338+
runs = dict()
480339
for run_ in runs_list:
481-
run = {'run_id': int(run_['oml:run_id']),
340+
run_id = int(run_['oml:run_id'])
341+
run = {'run_id': run_id,
482342
'task_id': int(run_['oml:task_id']),
483343
'setup_id': int(run_['oml:setup_id']),
484344
'flow_id': int(run_['oml:flow_id']),
485345
'uploader': int(run_['oml:uploader'])}
486346

487-
runs.append(run)
488-
runs.sort(key=lambda t: t['run_id'])
347+
runs[run_id] = run
489348

490349
return runs

tests/entities/test_dataset.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def test_get_data_with_target(self):
8686
X, y = self.dataset.get_data(target="class")
8787
self.assertIsInstance(X, np.ndarray)
8888
self.assertEqual(X.dtype, np.float32)
89-
self.assertEqual(y.dtype, np.int64)
89+
self.assertIn(y.dtype, [np.int32, np.int64])
9090
self.assertEqual(X.shape, (898, 38))
9191
X, y, attribute_names = self.dataset.get_data(
9292
target="class", return_attribute_names=True)
@@ -99,7 +99,7 @@ def test_get_sparse_dataset_with_target(self):
9999
self.assertIsInstance(X, np.ndarray)
100100
self.assertEqual(X.dtype, np.float32)
101101
self.assertIsInstance(y, np.ndarray)
102-
self.assertEqual(y.dtype, np.int64)
102+
self.assertIn(y.dtype, [np.int32, np.int64])
103103
self.assertEqual(X.shape, (2, 20000))
104104
X, y, attribute_names = self.sparse_dataset.get_data(
105105
target="class", return_attribute_names=True)
@@ -188,7 +188,7 @@ def test_get_data_rowid_and_ignore_and_target(self):
188188
X, y = self.dataset.get_data(target="class", include_row_id=False,
189189
include_ignore_attributes=False)
190190
self.assertEqual(X.dtype, np.float32)
191-
self.assertEqual(y.dtype, np.int64)
191+
self.assertIn(y.dtype, [np.int32, np.int64])
192192
self.assertEqual(X.shape, (898, 36))
193193
X, y, categorical = self.dataset.get_data(
194194
target="class", return_categorical_indicator=True)
@@ -205,7 +205,7 @@ def test_get_sparse_dataset_rowid_and_ignore_and_target(self):
205205
include_ignore_attributes=False)
206206
self.assertIsInstance(X, np.ndarray)
207207
self.assertEqual(X.dtype, np.float32)
208-
self.assertEqual(y.dtype, np.int64)
208+
self.assertIn(y.dtype, [np.int32, np.int64])
209209
self.assertEqual(X.shape, (2, 19998))
210210
X, y, categorical = self.sparse_dataset.get_data(
211211
target="class", return_categorical_indicator=True)

0 commit comments

Comments
 (0)