Skip to content

Commit 2c5c214

Browse files
author
janvanrijn
committed
merged list dataset functions to one function with conditional arguments.
idem for list task functions.
1 parent 81b7e84 commit 2c5c214

6 files changed

Lines changed: 70 additions & 172 deletions

File tree

openml/datasets/__init__.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
from .functions import (list_datasets, list_datasets_by_tag,
2-
check_datasets_active, get_datasets, get_dataset,
3-
list_datasets_paginate)
1+
from .functions import (list_datasets, check_datasets_active,
2+
get_datasets, get_dataset)
43
from .dataset import OpenMLDataset
54

65
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
7-
'OpenMLDataset', 'list_datasets', 'list_datasets_by_tag',
8-
'list_datasets_paginate']
6+
'OpenMLDataset', 'list_datasets']

openml/datasets/functions.py

Lines changed: 25 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _list_cached_datasets():
4242
dataset_directory_content = os.listdir(directory_name)
4343

4444
if "dataset.arff" in dataset_directory_content and \
45-
"description.xml" in dataset_directory_content:
45+
"description.xml" in dataset_directory_content:
4646
if dataset_id not in datasets:
4747
datasets.append(dataset_id)
4848

@@ -111,74 +111,51 @@ def _get_cached_dataset_arff(dataset_id):
111111
"cached" % dataset_id)
112112

113113

114-
def list_datasets():
114+
def list_datasets(offset=None, size=None, tag=None):
115115
"""Return a list of all dataset which are on OpenML.
116116
117-
Returns
118-
-------
119-
datasets : list of dicts
120-
A list of all datasets.
121-
122-
Every dataset is represented by a dictionary containing
123-
the following information:
124-
- dataset id
125-
- status
126-
127-
If qualities are calculated for the dataset, some of
128-
these are also returned.
129-
"""
130-
return _list_datasets("data/list")
131-
132-
133-
def list_datasets_paginate(offset,size):
134-
"""Return a partial list (of given size) dataset which are on OpenML, starting with offset.
135-
136117
Parameters
137118
----------
138119
offset : int
139120
the number of datasets to skip, starting from the first
140121
size : int
141122
the maximum datasets of tasks to show
123+
tag : str
124+
the tag to include
142125
143126
Returns
144127
-------
145128
datasets : list of dicts
146-
A partial list of datasets.
129+
A list of datasets having the given tag (if applicable).
147130
148131
Every dataset is represented by a dictionary containing
149132
the following information:
150133
- dataset id
151134
- status
152-
135+
153136
If qualities are calculated for the dataset, some of
154137
these are also returned.
155138
"""
156-
try:
157-
offset = int(offset)
158-
except:
159-
raise ValueError("Offset is neither an Integer nor can be "
160-
"cast to an Integer.")
161-
try:
162-
limit = int(size)
163-
except:
164-
raise ValueError("Size is neither an Integer nor can be "
165-
"cast to an Integer.")
166-
return _list_datasets("data/list/offset/%d/limit/%d" % (offset, size))
167-
168-
169-
def list_datasets_by_tag(tag):
170-
"""Return all datasets having the given tag.
139+
api_call = "data/list"
140+
if offset is not None:
141+
try:
142+
offset = int(offset)
143+
api_call += "/offset/%d" % offset
144+
except:
145+
raise ValueError("Offset is neither an Integer nor can be "
146+
"cast to an Integer.")
171147

172-
Returns
173-
-------
174-
datasets : list of dicts
175-
A list of all datasets having the given tag. Every dataset is
176-
represented by a dictionary containing the following information:
177-
dataset id, and status. If qualities are calculated for the dataset,
178-
some of these are also returned.
148+
if size is not None:
149+
try:
150+
size = int(size)
151+
api_call += "/limit/%d" % size
152+
except:
153+
raise ValueError("Size is neither an Integer nor can be "
154+
"cast to an Integer.")
155+
if tag is not None:
156+
api_call += "/tag/%s" % tag
179157

180-
"""
181-
return _list_datasets("data/list/%s" % tag)
158+
return _list_datasets(api_call)
182159

183160

184161
def _list_datasets(api_call):
@@ -190,7 +167,7 @@ def _list_datasets(api_call):
190167
assert type(datasets_dict['oml:data']['oml:dataset']) == list, \
191168
type(datasets_dict['oml:data'])
192169
assert datasets_dict['oml:data']['@xmlns:oml'] == \
193-
'http://openml.org/openml'
170+
'http://openml.org/openml'
194171

195172
datasets = []
196173
for dataset_ in datasets_dict['oml:data']['oml:dataset']:

openml/tasks/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
from .task import OpenMLTask
22
from .split import OpenMLSplit
3-
from .functions import (get_task, list_tasks, list_tasks_by_type,
4-
list_tasks_by_tag, list_tasks_paginate,
5-
list_tasks_by_type_paginate)
3+
from .functions import (get_task, list_tasks)
64

7-
__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'list_tasks_by_type',
8-
'list_tasks_by_tag', 'list_tasks_paginate', 'OpenMLSplit',
9-
'list_tasks_by_type_paginate']
5+
__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'OpenMLSplit']

openml/tasks/functions.py

Lines changed: 34 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -89,131 +89,58 @@ def _get_estimation_procedure_list():
8989
return procs
9090

9191

92-
def list_tasks_by_type(task_type_id):
93-
"""Return a list of all tasks for a given tasks type which are on OpenML.
92+
def list_tasks(task_type_id=None, offset=None, size=None, tag=None):
93+
"""Return a number of tasks having the given tag and task_type_id
9494
9595
Parameters
9696
----------
9797
task_type_id : int
9898
ID of the task type as detailed
9999
`here <http://www.openml.org/search?type=task_type>`_.
100-
101-
Returns
102-
-------
103-
list
104-
A list of all tasks of the given task type. Every task is represented by
105-
a dictionary containing the following information: task id,
106-
dataset id, task_type and status. If qualities are calculated for
107-
the associated dataset, some of these are also returned.
108-
"""
109-
try:
110-
task_type_id = int(task_type_id)
111-
except:
112-
raise ValueError("Task Type ID is neither an Integer nor can be "
113-
"cast to an Integer.")
114-
return _list_tasks("task/list/type/%d" % task_type_id)
115-
116-
117-
def list_tasks_paginate(offset,size):
118-
"""Return a partial list (of given size) tasks for a given tasks type, starting with offset.
119-
120-
Parameters
121-
----------
122100
offset : int
123101
the number of tasks to skip, starting from the first
124102
size : int
125103
the maximum number of tasks to show
126-
127-
Returns
128-
-------
129-
list
130-
A partial list of tasks of the task type. Every task is represented by a
131-
dictionary containing the following information: task id,
132-
dataset id, task_type and status. If qualities are calculated for
133-
the associated dataset, some of these are also returned.
134-
"""
135-
try:
136-
offset = int(offset)
137-
except:
138-
raise ValueError("Offset is neither an Integer nor can be "
139-
"cast to an Integer.")
140-
try:
141-
size = int(size)
142-
except:
143-
raise ValueError("Size is neither an Integer nor can be "
144-
"cast to an Integer.")
145-
return _list_tasks("task/list/offset/%d/limit/%d" % (offset, size))
146-
147-
148-
def list_tasks_by_type_paginate(task_type_id,offset,size):
149-
"""Return a partial list (of given size) tasks, starting with offset.
150-
151-
Parameters
152-
----------
153-
task_type_id : int
154-
ID of the task type as detailed
155-
`here <http://www.openml.org/search?type=task_type>`_.
156-
offset : int
157-
the number of tasks to skip, starting from the first
158-
size : int
159-
the maximum number of tasks to show
160-
161-
Returns
162-
-------
163-
list
164-
A partial list of tasks. Every task is represented by a
165-
dictionary containing the following information: task id,
166-
dataset id, task_type and status. If qualities are calculated for
167-
the associated dataset, some of these are also returned.
168-
"""
169-
try:
170-
task_type_id = int(task_type_id)
171-
except:
172-
raise ValueError("Task Type ID is neither an Integer nor can be "
173-
"cast to an Integer.")
174-
try:
175-
offset = int(offset)
176-
except:
177-
raise ValueError("Offset is neither an Integer nor can be "
178-
"cast to an Integer.")
179-
try:
180-
size = int(size)
181-
except:
182-
raise ValueError("Size is neither an Integer nor can be "
183-
"cast to an Integer.")
184-
return _list_tasks("task/list/type/%d/offset/%d/limit/%d" % (task_type_id,offset, size))
185-
186-
187-
def list_tasks_by_tag(tag):
188-
"""Return all tasks having the given tag
189-
190-
Parameters
191-
----------
192104
tag : str
105+
the tag to include
193106
194107
Returns
195108
-------
196109
list
197-
A list of all tasks having a give tag. Every task is represented by
198-
a dictionary containing the following information: task id,
199-
dataset id, task_type and status. If qualities are calculated for
200-
the associated dataset, some of these are also returned.
110+
A list of all tasks having the given task_type_id and the give tag.
111+
Every task is represented by a dictionary containing the following
112+
information: task id, dataset id, task_type and status. If qualities
113+
are calculated for the associated dataset, some of these are also
114+
returned.
201115
"""
202-
return _list_tasks("task/list/tag/%s" % tag)
203-
116+
api_call = "task/list"
117+
if task_type_id is not None:
118+
try:
119+
task_type_id = int(task_type_id)
120+
api_call += "/task_type_id/%d" % task_type_id
121+
except:
122+
raise ValueError("Task_type_id is neither an Integer nor can be "
123+
"cast to an Integer.")
204124

205-
def list_tasks():
206-
"""Return a list of all tasks which are on OpenML.
125+
if offset is not None:
126+
try:
127+
offset = int(offset)
128+
api_call += "/offset/%d" % offset
129+
except:
130+
raise ValueError("Offset is neither an Integer nor can be "
131+
"cast to an Integer.")
207132

208-
Returns
209-
-------
210-
list
211-
A list of all tasks. Every task is represented by a
212-
dictionary containing the following information: task id,
213-
dataset id, task_type and status. If qualities are calculated for
214-
the associated dataset, some of these are also returned.
215-
"""
216-
return _list_tasks('task/list')
133+
if size is not None:
134+
try:
135+
size = int(size)
136+
api_call += "/limit/%d" % size
137+
except:
138+
raise ValueError("Size is neither an Integer nor can be "
139+
"cast to an Integer.")
140+
if tag is not None:
141+
api_call += "/tag/%s" % tag
142+
143+
return _list_tasks(api_call)
217144

218145

219146
def _list_tasks(api_call):

tests/datasets/test_datasets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_list_datasets(self):
8888
'deactivated'])
8989

9090
def test_list_datasets_by_tag(self):
91-
datasets = openml.datasets.list_datasets_by_tag('uci')
91+
datasets = openml.datasets.list_datasets(tag='uci')
9292
self.assertGreaterEqual(len(datasets), 5)
9393
for dataset in datasets:
9494
self.assertEqual(type(dataset), dict)
@@ -104,7 +104,7 @@ def test_list_datasets_paginate(self):
104104
size = 10
105105
max = 100
106106
for i in range(0, max, size):
107-
data = openml.datasets.list_datasets_paginate(i, size)
107+
data = openml.datasets.list_datasets(offset=i, size=size)
108108
self.assertGreaterEqual(size, len(data))
109109
for dataset in data:
110110
self.assertEqual(type(dataset), dict)

tests/tasks/test_task_functions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,13 @@ def _check_task(self, task):
5151
['in_preparation', 'active', 'deactivated'])
5252

5353
def test_list_tasks_by_type(self):
54-
tasks = openml.tasks.list_tasks_by_type(task_type_id=3)
54+
tasks = openml.tasks.list_tasks(task_type_id=3)
5555
self.assertGreaterEqual(len(tasks), 300)
5656
for task in tasks:
5757
self._check_task(task)
5858

5959
def test_list_tasks_by_tag(self):
60-
tasks = openml.tasks.list_tasks_by_tag('basic')
60+
tasks = openml.tasks.list_tasks(tag='basic')
6161
self.assertGreaterEqual(len(tasks), 57)
6262
for task in tasks:
6363
self._check_task(task)
@@ -72,7 +72,7 @@ def test_list_tasks_paginate(self):
7272
size = 10
7373
max = 100
7474
for i in range(0, max, size):
75-
tasks = openml.tasks.list_tasks_paginate(i, size)
75+
tasks = openml.tasks.list_tasks(offset=i, size=size)
7676
self.assertGreaterEqual(size, len(tasks))
7777
for task in tasks:
7878
self.assertEqual(type(task), dict)
@@ -91,7 +91,7 @@ def test_list_tasks_per_type_paginate(self):
9191
task_types = 5
9292
for j in range(1,task_types):
9393
for i in range(0, max, size):
94-
tasks = openml.tasks.list_tasks_by_type_paginate(j, i, size)
94+
tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size)
9595
self.assertGreaterEqual(size, len(tasks))
9696
for task in tasks:
9797
self.assertEqual(type(task), dict)

0 commit comments

Comments
 (0)