Skip to content

Commit 755c52b

Browse files
committed
Merge pull request #98 from openml/features/finish_api
Features/finish api
2 parents a3ba1b0 + e264c9e commit 755c52b

9 files changed

Lines changed: 126 additions & 41 deletions

File tree

doc/progress.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@ API call implemented tested properly test
1616
/data/features/{id} yes yes
1717
/data/qualities/{id} yes yes
1818
/data/list/ yes yes
19-
/data/list/tag/{tag}
19+
/data/list/tag/{tag} yes yes
2020
/data/upload/ yes yes
2121
/data/tag
2222
/data/untag
2323
/data/delete/ X
2424

2525
/task/{task} yes yes
2626
/task/list yes yes
27-
/task/list/type/{id}
28-
/task/list/tag/{tag}
27+
/task/list/type/{id} yes yes
28+
/task/list/tag/{tag} yes yes
2929
/task {POST}
3030
/task/tag
3131
/task/untag

openml/config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def _setup():
3535
private_dir = config.get('FAKE_SECTION', 'private_directory')
3636
cache_dir = config.get('FAKE_SECTION', 'cachedir')
3737
set_cache_directory(cache_dir, private_dir)
38-
print(config)
3938

4039

4140
def set_cache_directory(cachedir, privatedir):

openml/datasets/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
from .functions import (list_datasets, check_datasets_active,
2-
get_datasets, get_dataset,
1+
from .functions import (list_datasets, list_datasets_by_tag,
2+
check_datasets_active, get_datasets, get_dataset,
33
get_dataset_description,
44
get_dataset_features, get_dataset_qualities)
55
from .dataset import OpenMLDataset
66

77
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
88
'get_datasets_arf', 'get_dataset_features',
99
'get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
10+
'list_datasets_by_tag',
1011
'get_dataset_description', 'list_datasets']

openml/datasets/functions.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,33 @@ def list_datasets():
110110
111111
Returns
112112
-------
113-
datasets : list
113+
list
114114
A list of all datasets. Every dataset is represented by a
115115
dictionary containing the following information: dataset id,
116116
and status. If qualities are calculated for the dataset, some of
117117
these are also returned.
118118
"""
119+
return _list_datasets("data/list")
120+
121+
122+
def list_datasets_by_tag(tag):
123+
"""Return all datasets having the given tag.
124+
125+
Returns
126+
-------
127+
list
128+
A list of all datasets having the given tag. Every dataset is
129+
represented by a dictionary containing the following information:
130+
dataset id, and status. If qualities are calculated for the dataset,
131+
some of these are also returned.
132+
133+
"""
134+
return _list_datasets("data/list/%s" % tag)
135+
136+
137+
def _list_datasets(api_call):
119138
# TODO add proper error handling here!
120-
return_code, xml_string = _perform_api_call("data/list/")
139+
return_code, xml_string = _perform_api_call(api_call)
121140
datasets_dict = xmltodict.parse(xml_string)
122141

123142
# Minimalistic check if the XML is useful

openml/tasks/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from .task import OpenMLTask
22
from .split import OpenMLSplit
3-
from .task_functions import get_task, list_tasks
3+
from .task_functions import get_task, list_tasks, list_tasks_by_type, \
4+
list_tasks_by_tag
45

5-
__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'OpenMLSplit']
6+
__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'list_tasks_by_type',
7+
'list_tasks_by_tag', 'OpenMLSplit']

openml/tasks/task_functions.py

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,20 +81,20 @@ def get_estimation_procedure_list():
8181
return procs
8282

8383

84-
def list_tasks(task_type_id=1):
85-
"""Return a list of all tasks which are on OpenML.
84+
def list_tasks_by_type(task_type_id):
85+
"""Return a list of all tasks for a given tasks type which are on OpenML.
8686
8787
Parameters
8888
----------
8989
task_type_id : int
9090
ID of the task type as detailed
91-
`here <http://openml.org/api/?f=openml.task.types>`_.
91+
`here <http://www.openml.org/search?type=task_type>`_.
9292
9393
Returns
9494
-------
95-
tasks : list
96-
A list of all tasks. Every task is represented by a
97-
dictionary containing the following information: task id,
95+
list
96+
A list of all tasks of the given task type. Every task is represented by
97+
a dictionary containing the following information: task id,
9898
dataset id, task_type and status. If qualities are calculated for
9999
the associated dataset, some of these are also returned.
100100
"""
@@ -103,14 +103,57 @@ def list_tasks(task_type_id=1):
103103
except:
104104
raise ValueError("Task Type ID is neither an Integer nor can be "
105105
"cast to an Integer.")
106+
return _list_tasks("task/list/type/%d" % task_type_id)
106107

107-
return_code, xml_string = _perform_api_call(
108-
"task/list/type/%d" % task_type_id)
108+
109+
def list_tasks_by_tag(tag):
110+
"""Return all tasks having the given tag
111+
112+
Parameters
113+
----------
114+
tag : str
115+
116+
Returns
117+
-------
118+
list
119+
A list of all tasks having a give tag. Every task is represented by
120+
a dictionary containing the following information: task id,
121+
dataset id, task_type and status. If qualities are calculated for
122+
the associated dataset, some of these are also returned.
123+
"""
124+
return _list_tasks("task/list/tag/%s" % tag)
125+
126+
127+
def list_tasks():
128+
"""Return a list of all tasks which are on OpenML.
129+
130+
Returns
131+
-------
132+
list
133+
A list of all tasks. Every task is represented by a
134+
dictionary containing the following information: task id,
135+
dataset id, task_type and status. If qualities are calculated for
136+
the associated dataset, some of these are also returned.
137+
"""
138+
return _list_tasks('task/list')
139+
140+
141+
def _list_tasks(api_call):
142+
return_code, xml_string = _perform_api_call(api_call)
109143
tasks_dict = xmltodict.parse(xml_string)
110144
# Minimalistic check if the XML is useful
111-
assert tasks_dict['oml:tasks']['@xmlns:oml'] == \
112-
'http://openml.org/openml'
113-
assert type(tasks_dict['oml:tasks']['oml:task']) == list
145+
if 'oml:tasks' not in tasks_dict:
146+
raise ValueError('Error in return XML, does not contain "oml:runs": %s'
147+
% str(tasks_dict))
148+
elif '@xmlns:oml' not in tasks_dict['oml:tasks']:
149+
raise ValueError('Error in return XML, does not contain '
150+
'"oml:runs"/@xmlns:oml: %s'
151+
% str(tasks_dict))
152+
elif tasks_dict['oml:tasks']['@xmlns:oml'] != 'http://openml.org/openml':
153+
raise ValueError('Error in return XML, value of '
154+
'"oml:runs"/@xmlns:oml is not '
155+
'"http://openml.org/openml": %s'
156+
% str(tasks_dict))
114157

115158
tasks = []
116159
procs = get_estimation_procedure_list()
@@ -127,7 +170,8 @@ def list_tasks(task_type_id=1):
127170
if input['@name'] == 'estimation_procedure':
128171
task[input['@name']] = proc_dict[int(input['#text'])]['name']
129172
else:
130-
task[input['@name']] = input['#text']
173+
value = input.get('#text')
174+
task[input['@name']] = value
131175

132176
task[input['@name']] = input['#text']
133177

tests/examples/test_OpenMLDemo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_notebook(self):
4545
msg = 'Error executing the notebook "%s". ' % notebook_filename
4646
msg += 'See notebook "%s" for the traceback.\n\n' % notebook_filename_out
4747
msg += e.traceback
48-
self.fail(msg)
48+
self.fail(msg)
4949
finally:
5050
with open(notebook_filename_out, mode='wt') as f:
5151
nbformat.write(nb, f)

tests/test_datasets.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,20 @@ def test_list_datasets(self):
5353
# data from the internet...
5454
datasets = openml.datasets.list_datasets()
5555
# 1087 as the number of datasets on openml.org
56-
self.assertTrue(len(datasets) >= 1087)
56+
self.assertGreaterEqual(len(datasets), 1087)
57+
for dataset in datasets:
58+
self.assertEqual(type(dataset), dict)
59+
self.assertGreaterEqual(len(dataset), 2)
60+
self.assertIn('did', dataset)
61+
self.assertIsInstance(dataset['did'], int)
62+
self.assertIn('status', dataset)
63+
self.assertTrue(is_string(dataset['status']))
64+
self.assertIn(dataset['status'], ['in_preparation', 'active',
65+
'deactivated'])
66+
67+
def test_list_datasets_by_tag(self):
68+
datasets = openml.datasets.list_datasets_by_tag('uci')
69+
self.assertGreaterEqual(len(datasets), 5)
5770
for dataset in datasets:
5871
self.assertEqual(type(dataset), dict)
5972
self.assertGreaterEqual(len(dataset), 2)

tests/test_task.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,36 @@
77

88

99
class TestTask(TestBase):
10+
def _check_task(self, task):
11+
self.assertEqual(type(task), dict)
12+
self.assertGreaterEqual(len(task), 2)
13+
self.assertIn('did', task)
14+
self.assertIsInstance(task['did'], int)
15+
self.assertIn('status', task)
16+
self.assertTrue(is_string(task['status']))
17+
self.assertIn(task['status'],
18+
['in_preparation', 'active', 'deactivated'])
19+
1020
def test_list_tasks(self):
11-
# We can only perform a smoke test here because we test on dynamic
12-
# data from the internet...
13-
def check_task(task):
14-
self.assertEqual(type(task), dict)
15-
self.assertGreaterEqual(len(task), 2)
16-
self.assertIn('did', task)
17-
self.assertIsInstance(task['did'], int)
18-
self.assertIn('status', task)
19-
self.assertTrue(is_string(task['status']))
20-
self.assertIn(task['status'],
21-
['in_preparation', 'active', 'deactivated'])
22-
23-
# use a small task type as we cant limit tasks.
24-
# TODO inspect the tasks maybe?
25-
tasks = openml.tasks.list_tasks(task_type_id=3)
21+
tasks = openml.tasks.list_tasks()
22+
self.assertGreaterEqual(len(tasks), 2000)
23+
for task in tasks:
24+
self._check_task(task)
25+
26+
def test_list_tasks_by_type(self):
27+
tasks = openml.tasks.list_tasks_by_type(task_type_id=3)
2628
self.assertGreaterEqual(len(tasks), 300)
2729
for task in tasks:
28-
check_task(task)
30+
self._check_task(task)
31+
32+
def test_list_tasks_by_tag(self):
33+
tasks = openml.tasks.list_tasks_by_tag('basic')
34+
self.assertGreaterEqual(len(tasks), 57)
35+
for task in tasks:
36+
self._check_task(task)
2937

3038
def test_get_task(self):
3139
task = openml.tasks.get_task(1)
32-
print(task)
3340
self.assertTrue(os.path.exists(
3441
os.path.join(os.getcwd(), "tasks", "1", "task.xml")))
3542
self.assertTrue(os.path.exists(

0 commit comments

Comments
 (0)