Skip to content

Commit c3a3766

Browse files
committed
ADD /data/list/tag/{tag} api call
1 parent a3ba1b0 commit c3a3766

4 files changed

Lines changed: 38 additions & 6 deletions

File tree

openml/config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def _setup():
3535
private_dir = config.get('FAKE_SECTION', 'private_directory')
3636
cache_dir = config.get('FAKE_SECTION', 'cachedir')
3737
set_cache_directory(cache_dir, private_dir)
38-
print(config)
3938

4039

4140
def set_cache_directory(cachedir, privatedir):

openml/datasets/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
from .functions import (list_datasets, check_datasets_active,
2-
get_datasets, get_dataset,
1+
from .functions import (list_datasets, list_datasets_by_tag,
2+
check_datasets_active, get_datasets, get_dataset,
33
get_dataset_description,
44
get_dataset_features, get_dataset_qualities)
55
from .dataset import OpenMLDataset
66

77
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
88
'get_datasets_arf', 'get_dataset_features',
99
'get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
10+
'list_datasets_by_tag',
1011
'get_dataset_description', 'list_datasets']

openml/datasets/functions.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,33 @@ def list_datasets():
110110
111111
Returns
112112
-------
113-
datasets : list
113+
list
114114
A list of all datasets. Every dataset is represented by a
115115
dictionary containing the following information: dataset id,
116116
and status. If qualities are calculated for the dataset, some of
117117
these are also returned.
118118
"""
119+
return _list_datasets("data/list")
120+
121+
122+
def list_datasets_by_tag(tag):
123+
"""Return all datasets having the given tag.
124+
125+
Returns
126+
-------
127+
list
128+
A list of all datasets having the given tag. Every dataset is
129+
represented by a dictionary containing the following information:
130+
dataset id, and status. If qualities are calculated for the dataset,
131+
some of these are also returned.
132+
133+
"""
134+
return _list_datasets("data/list/%s" % tag)
135+
136+
137+
def _list_datasets(api_call):
119138
# TODO add proper error handling here!
120-
return_code, xml_string = _perform_api_call("data/list/")
139+
return_code, xml_string = _perform_api_call(api_call)
121140
datasets_dict = xmltodict.parse(xml_string)
122141

123142
# Minimalistic check if the XML is useful

tests/test_datasets.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,20 @@ def test_list_datasets(self):
5353
# data from the internet...
5454
datasets = openml.datasets.list_datasets()
5555
# 1087 as the number of datasets on openml.org
56-
self.assertTrue(len(datasets) >= 1087)
56+
self.assertGreaterEqual(len(datasets), 1087)
57+
for dataset in datasets:
58+
self.assertEqual(type(dataset), dict)
59+
self.assertGreaterEqual(len(dataset), 2)
60+
self.assertIn('did', dataset)
61+
self.assertIsInstance(dataset['did'], int)
62+
self.assertIn('status', dataset)
63+
self.assertTrue(is_string(dataset['status']))
64+
self.assertIn(dataset['status'], ['in_preparation', 'active',
65+
'deactivated'])
66+
67+
def test_list_datasets_by_tag(self):
68+
datasets = openml.datasets.list_datasets_by_tag('uci')
69+
self.assertGreaterEqual(len(datasets), 5)
5770
for dataset in datasets:
5871
self.assertEqual(type(dataset), dict)
5972
self.assertGreaterEqual(len(dataset), 2)

0 commit comments

Comments
 (0)