Skip to content

Commit d05790b

Browse files
authored
Merge pull request #355 from amueller/tagging
WIP dataset Tagging
2 parents 6b22bb6 + 96a850b commit d05790b

6 files changed

Lines changed: 62 additions & 16 deletions

File tree

openml/_api_calls.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import xmltodict
88

99
from . import config
10-
from .exceptions import OpenMLServerError, OpenMLServerException
10+
from .exceptions import (OpenMLServerError, OpenMLServerException,
11+
OpenMLServerNoResult)
1112

1213

1314
def _perform_api_call(call, data=None, file_dictionary=None,
@@ -138,4 +139,6 @@ def _parse_server_exception(response):
138139
additional = None
139140
if 'oml:additional_information' in server_exception['oml:error']:
140141
additional = server_exception['oml:error']['oml:additional_information']
142+
if code in [370, 372]:
143+
return OpenMLServerNoResult(code, message, additional)
141144
return OpenMLServerException(code, message, additional)

openml/datasets/dataset.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
import os
55
import six
6-
import sys
76

87
import arff
98

@@ -26,9 +25,9 @@ class OpenMLDataset(object):
2625
2726
Parameters
2827
----------
29-
name : string
28+
name : str
3029
Name of the dataset
31-
description : string
30+
description : str
3231
Description of the dataset
3332
FIXME : which of these do we actually nee?
3433
"""
@@ -82,7 +81,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
8281
feature = OpenMLDataFeature(int(xmlfeature['oml:index']),
8382
xmlfeature['oml:name'],
8483
xmlfeature['oml:data_type'],
85-
None, #todo add nominal values (currently not in database)
84+
None, # todo add nominal values (currently not in database)
8685
int(xmlfeature.get('oml:number_of_missing_values', 0)))
8786
if idx != feature.index:
8887
raise ValueError('Data features not provided in right order')
@@ -124,6 +123,28 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
124123
logger.debug("Saved dataset %d: %s to file %s" %
125124
(self.dataset_id, self.name, self.data_pickle_file))
126125

126+
def push_tag(self, tag):
127+
"""Annotates this data set with a tag on the server.
128+
129+
Parameters
130+
----------
131+
tag : str
132+
Tag to attach to the dataset.
133+
"""
134+
data = {'data_id': self.dataset_id, 'tag': tag}
135+
_perform_api_call("/data/tag", data=data)
136+
137+
def remove_tag(self, tag):
138+
"""Removes a tag from this dataset on the server.
139+
140+
Parameters
141+
----------
142+
tag : str
143+
Tag to attach to the dataset.
144+
"""
145+
data = {'data_id': self.dataset_id, 'tag': tag}
146+
_perform_api_call("/data/untag", data=data)
147+
127148
def __eq__(self, other):
128149
if type(other) != OpenMLDataset:
129150
return False
@@ -310,7 +331,6 @@ def retrieve_class_labels(self, target_name='class'):
310331
else:
311332
return None
312333

313-
314334
def get_features_by_type(self, data_type, exclude=None,
315335
exclude_ignore_attributes=True,
316336
exclude_row_id_attribute=True):
@@ -372,11 +392,7 @@ def publish(self):
372392
373393
Returns
374394
-------
375-
return_code : int
376-
Return code from server
377-
378-
return_value : string
379-
xml return from server
395+
self
380396
"""
381397

382398
file_elements = {'description': self._to_xml()}
@@ -396,7 +412,7 @@ def _to_xml(self):
396412
397413
Returns
398414
-------
399-
xml_dataset : string
415+
xml_dataset : str
400416
XML description of the data.
401417
"""
402418
xml_dataset = ('<oml:data_set_description '

openml/datasets/functions.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import xmltodict
99

1010
from .dataset import OpenMLDataset
11-
from ..exceptions import OpenMLCacheException
11+
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
1212
from .. import config
1313
from .._api_calls import _perform_api_call, _read_url
1414

@@ -178,7 +178,10 @@ def list_datasets(offset=None, size=None, tag=None):
178178

179179
def _list_datasets(api_call):
180180
# TODO add proper error handling here!
181-
xml_string = _perform_api_call(api_call)
181+
try:
182+
xml_string = _perform_api_call(api_call)
183+
except OpenMLServerNoResult:
184+
return []
182185
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
183186

184187
# Minimalistic check if the XML is useful

openml/exceptions.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class OpenMLServerError(PyOpenMLError):
1111
def __init__(self, message):
1212
super(OpenMLServerError, self).__init__(message)
1313

14-
#
14+
1515
class OpenMLServerException(OpenMLServerError):
1616
"""exception for when the result of the server was
1717
not 200 (e.g., listing call w/o results). """
@@ -22,6 +22,11 @@ def __init__(self, code, message, additional=None):
2222
super(OpenMLServerException, self).__init__(message)
2323

2424

25+
class OpenMLServerNoResult(OpenMLServerException):
26+
"""exception for when the result of the server is empty. """
27+
pass
28+
29+
2530
class OpenMLCacheException(PyOpenMLError):
2631
"""Dataset / task etc not found in cache"""
2732
def __init__(self, message):

tests/test_datasets/test_dataset.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
from scipy import sparse
33
import six
4+
from time import time
45

56
from openml.testing import TestBase
67
import openml
@@ -90,6 +91,25 @@ def test_get_data_with_ignore_attributes(self):
9091
# TODO test multiple ignore attributes!
9192

9293

94+
class OpenMLDatasetTestOnTestServer(TestBase):
95+
def setUp(self):
96+
super(OpenMLDatasetTestOnTestServer, self).setUp()
97+
# longley, really small dataset
98+
self.dataset = openml.datasets.get_dataset(125)
99+
100+
def test_tagging(self):
101+
tag = "testing_tag_{}_{}".format(self.id(), time())
102+
ds_list = openml.datasets.list_datasets(tag=tag)
103+
self.assertEqual(len(ds_list), 0)
104+
self.dataset.push_tag(tag)
105+
ds_list = openml.datasets.list_datasets(tag=tag)
106+
self.assertEqual(len(ds_list), 1)
107+
self.assertIn(125, ds_list)
108+
self.dataset.remove_tag(tag)
109+
ds_list = openml.datasets.list_datasets(tag=tag)
110+
self.assertEqual(len(ds_list), 0)
111+
112+
93113
class OpenMLDatasetTestSparse(TestBase):
94114
_multiprocess_can_split_ = True
95115

tests/test_datasets/test_dataset_functions.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import unittest
22
import os
3-
import os
43
import sys
54

65
if sys.version_info[0] >= 3:

0 commit comments

Comments
 (0)