Skip to content

Commit 198b07e

Browse files
committed
Merge pull request #13 from mfeurer/master
Use new OpenML API
2 parents 6fb9ca7 + fd76818 commit 198b07e

7 files changed

Lines changed: 103 additions & 173 deletions

File tree

openml/apiconnector.py

Lines changed: 30 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@
2121
import xmltodict
2222

2323
from .entities.dataset import OpenMLDataset
24-
from .entities.task import Task
24+
from .entities.task import OpenMLTask
2525
from .entities.split import OpenMLSplit
2626
from .util import is_string
2727

2828
logger = logging.getLogger(__name__)
2929

30-
OPENML_URL = "http://www.openml.org"
30+
OPENML_URL = "http://api_new.openml.org/v1/"
3131

3232

3333
class OpenMLStatusChange(Warning):
@@ -50,10 +50,6 @@ def __init__(self, message):
5050
super(OpenMLServerError, self).__init__(message)
5151

5252

53-
class AuthentificationError(PyOpenMLError):
54-
def __init__(self, message):
55-
super(AuthentificationError, self).__init__(message)
56-
5753
class OpenMLCacheException(PyOpenMLError):
5854
def __init__(self, message):
5955
super(OpenMLCacheException, self).__init__(message)
@@ -77,14 +73,9 @@ class APIConnector(object):
7773
directory '.openml/cache' in the users home directory will be used.
7874
If either directory does not exist, it will be created.
7975
80-
username : string, optional (default=None)
81-
Your username.
82-
83-
password : string, optional (default=None)
84-
Your passwort. This will not be stored! Instead, the md5 hash is
85-
calculated and used to authenticate to the OpenML server,
86-
which returns a session key. This session key is the only credential
87-
which is stored.
76+
apikey : string, optional (default=None)
77+
Your OpenML API key which will be used to authenticate you at the OpenML
78+
server.
8879
8980
server : string, optional (default=None)
9081
The OpenML server to connect to.
@@ -110,24 +101,18 @@ class APIConnector(object):
110101
Raises
111102
------
112103
ValueError
113-
If one of the following variables is neither specified in the
114-
configuration file nor when creating the APIconnector class:
115-
username, password
116-
AuthentificationError
117-
If authentification at the OpenML server does not work.
104+
If apikey is neither specified in the config nor given as an argument.
118105
OpenMLServerError
119106
If the OpenML server returns an unexptected response.
120107
121108
Testing the API calls in Firefox
122109
--------------------------------
123-
With the Firefox AddOn HTTPRequestor, one can check the OpenML API calls
124-
which need authentification (by providing such). First, create the md5
125-
hash of your OpenML password and add it to
110+
With the Firefox AddOn HTTPRequestor, one can check the OpenML API calls.
126111
127112
"""
128-
def __init__(self, cache_directory=None, username=None, password=None,
113+
def __init__(self, cache_directory=None, apikey=None,
129114
server=None, verbosity=None, configure_logger=True,
130-
authenticate=True, private_directory=None):
115+
private_directory=None):
131116
# The .openml directory is necessary, just try to create it (EAFP)
132117
try:
133118
os.mkdir(os.path.expanduser('~/.openml'))
@@ -139,10 +124,8 @@ def __init__(self, cache_directory=None, username=None, password=None,
139124
self.config = self._parse_config()
140125
if cache_directory is not None:
141126
self.config.set('FAKE_SECTION', 'cachedir', cache_directory)
142-
if username is not None:
143-
self.config.set('FAKE_SECTION', 'username', username)
144-
if password is not None:
145-
self.config.set('FAKE_SECTION', 'password', password)
127+
if apikey is not None:
128+
self.config.set('FAKE_SECTION', 'apikey', apikey)
146129
if server is not None:
147130
self.config.set('FAKE_SECTION', 'server', server)
148131
if verbosity is not None:
@@ -162,11 +145,6 @@ def __init__(self, cache_directory=None, username=None, password=None,
162145
format='[%(levelname)s] [%(asctime)s:%(name)s] %('
163146
'message)s', datefmt='%H:%M:%S', level=level)
164147

165-
if authenticate:
166-
self._session_hash = self._authenticate(
167-
self.config.get('FAKE_SECTION', 'username'),
168-
self.config.get('FAKE_SECTION', 'password'))
169-
170148
# Set up the cache directories
171149
self.cache_dir = self.config.get('FAKE_SECTION', 'cachedir')
172150
self.dataset_cache_dir = os.path.join(self.cache_dir, "datasets")
@@ -187,50 +165,8 @@ def __init__(self, cache_directory=None, username=None, password=None,
187165
if not os.path.exists(dir_) and not os.path.isdir(dir_):
188166
os.mkdir(dir_)
189167

190-
def _authenticate(self, username, password):
191-
# Check the username
192-
if username is None:
193-
raise ValueError("No username specified.")
194-
elif not is_string(username):
195-
raise ValueError("Username must be of type string.")
196-
elif not username:
197-
raise ValueError("No value for argument username specified.")
198-
199-
# Check the password
200-
if password is None:
201-
raise ValueError("No password specified.")
202-
elif not is_string(password):
203-
raise ValueError("Password must be of type string.")
204-
elif not password:
205-
raise ValueError("No value for argument password specified.")
206-
207-
m = hashlib.md5()
208-
m.update(password.encode('utf-8'))
209-
md5 = m.hexdigest()
210-
211-
# TODO: catch possible exceptions
212-
data = {'username': username, 'password': md5}
213-
return_code, xml_string = self._perform_api_call(
214-
"openml.authenticate", data=data, add_authentication=False)
215-
216-
xml_dict = xmltodict.parse(xml_string)
217-
if xml_dict.get('oml:authenticate'):
218-
session_hash = xml_dict['oml:authenticate']['oml:session_hash']
219-
return session_hash
220-
elif xml_dict.get('oml:error'):
221-
error_code = xml_dict['oml:error']['oml:code']
222-
if error_code == '252':
223-
raise AuthentificationError(
224-
"Authentication failed. The username and password did not "
225-
"match any record in the database.")
226-
else:
227-
OpenMLServerError(
228-
"Unexpected server response code %d with response"
229-
" message %s" % (return_code, xml_string))
230-
231168
def _parse_config(self):
232-
defaults = {'username': '',
233-
'password': '',
169+
defaults = {'apikey': '',
234170
'server': OPENML_URL,
235171
'verbosity': 0,
236172
'cachedir': os.path.expanduser('~/.openml/cache'),
@@ -436,7 +372,7 @@ def get_dataset_list(self):
436372
these are also returned.
437373
"""
438374
# TODO add proper error handling here!
439-
return_code, xml_string = self._perform_api_call("openml.data")
375+
return_code, xml_string = self._perform_api_call("data/list/")
440376
datasets_dict = xmltodict.parse(xml_string)
441377

442378
# Minimalistic check if the XML is useful
@@ -551,7 +487,7 @@ def download_dataset_description(self, did):
551487
except (OpenMLCacheException):
552488
try:
553489
return_code, dataset_xml = self._perform_api_call(
554-
"openml.data.description", data_id=did)
490+
"data/%d" % did)
555491
except (URLError, UnicodeEncodeError) as e:
556492
# TODO logger.debug
557493
self._remove_dataset_chache_dir(did)
@@ -566,7 +502,7 @@ def download_dataset_description(self, did):
566502
"oml:data_set_description"]
567503
except Exception as e:
568504
# TODO logger.debug
569-
self._remove_dataset_chache_dir()
505+
self._remove_dataset_chache_dir(did)
570506
print("Dataset ID", did)
571507
raise e
572508

@@ -611,7 +547,7 @@ def download_dataset_features(self, did):
611547
except (OSError, IOError):
612548
try:
613549
return_code, features_xml = self._perform_api_call(
614-
"openml.data.features", data_id=did)
550+
"data/features/%d" % did)
615551
except (URLError, UnicodeEncodeError) as e:
616552
# TODO logger.debug
617553
print(e)
@@ -635,7 +571,7 @@ def download_dataset_qualities(self, did):
635571
qualities_file = os.path.join(did_cache_dir, "qualities.xml")
636572
try:
637573
return_code, qualities_xml = self._perform_api_call(
638-
"openml.data.qualities", data_id=did)
574+
"data/qualities/%d" % did)
639575
except (URLError, UnicodeEncodeError) as e:
640576
# TODO logger.debug
641577
print(e)
@@ -723,7 +659,7 @@ def get_task_list(self, task_type_id=1):
723659
"cast to an Integer.")
724660

725661
return_code, xml_string = self._perform_api_call(
726-
"openml.tasks", task_type_id=task_type_id)
662+
"task/list/%d" % task_type_id)
727663
tasks_dict = xmltodict.parse(xml_string)
728664
# Minimalistic check if the XML is useful
729665
assert tasks_dict['oml:tasks']['@xmlns:oml'] == \
@@ -773,7 +709,7 @@ def download_task(self, task_id):
773709

774710
try:
775711
return_code, task_xml = self._perform_api_call(
776-
"openml.task.search", task_id=task_id)
712+
"task/%d" % task_id)
777713
except (URLError, UnicodeEncodeError) as e:
778714
print(e)
779715
raise e
@@ -817,7 +753,7 @@ def _create_task_from_xml(self, xml):
817753
text = parameter.get("#text", "")
818754
estimation_parameters[name] = text
819755

820-
return Task(
756+
return OpenMLTask(
821757
dic["oml:task_id"], dic["oml:task_type"],
822758
inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
823759
inputs["source_data"]["oml:data_set"]["oml:target_feature"],
@@ -875,22 +811,18 @@ def _create_task_cache_dir(self, task_id):
875811
pass
876812
return task_cache_dir
877813

878-
def _perform_api_call(self, call, data=None, add_authentication=True,
879-
**kwargs):
814+
def _perform_api_call(self, call, data=None):
880815
"""Perform an API call at the OpenML server.
881816
882817
This method must be used by all other methods using the REST API.
883818
884819
Parameters
885820
----------
886821
call : str
887-
The API call. For example openml.task.search
822+
The API call. For example data/list
888823
data : dict (default=None)
889824
Dictionary containing data which will be sent to the OpenML
890825
server via a POST request.
891-
add_authentication : bool (default=True)
892-
DO NOT CHANGE THIS. The only method which should set this
893-
argument to False is `authenticate`.
894826
**kwargs
895827
Further arguments which are appended as GET arguments.
896828
@@ -901,21 +833,16 @@ def _perform_api_call(self, call, data=None, add_authentication=True,
901833
return_value : str
902834
Return value of the OpenML server
903835
"""
904-
# TODO: do input validation!
905-
url = self.config.get("FAKE_SECTION", "server") + "/api/?f="
906-
url += "" + call
907-
if kwargs:
908-
for key in kwargs:
909-
url += "&" + key + "=" + str(kwargs[key])
910-
# TODO logger.debug(url)
911-
return self._read_url(url, data=data,
912-
add_authentication=add_authentication)
913-
914-
def _read_url(self, url, add_authentication=False, data=None):
836+
url = self.config.get("FAKE_SECTION", "server")
837+
if not url.endswith("/"):
838+
url += "/"
839+
url += call
840+
return self._read_url(url, data=data)
841+
842+
def _read_url(self, url, data=None):
915843
if data is None:
916844
data = {}
917-
if add_authentication:
918-
data = {'session_hash': self._session_hash}
845+
data['session_hash'] = self.config.get('FAKE_SECTION', 'apikey')
919846
data = urlencode(data)
920847
data = data.encode('utf-8')
921848

openml/entities/task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pickle
1010

1111

12-
class Task(object):
12+
class OpenMLTask(object):
1313
def __init__(self, task_id, task_type, data_set_id, target_feature,
1414
estimation_procedure_type, data_splits_url,
1515
estimation_parameters, evaluation_measure,cost_matrix, api_connector):

source/api.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,10 @@
55
APIs
66
****
77

8-
.. autoclass:: openml.apiconnector.APIConnector
8+
.. autoclass:: openml.apiconnector.APIConnector
9+
10+
.. autoclass:: openml.entities.dataset.OpenMLDataset
11+
12+
.. autoclass:: openml.entities.task.OpenMLTask
13+
14+
.. autoclass:: openml.entities.split.OpenMLSplit

source/progress.rst

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,50 @@ API calls
1212
=============================================== =========== ====== =============== ========== =====================
1313
API call implemented tested properly tested loads json proper error handling
1414
=============================================== =========== ====== =============== ========== =====================
15-
authenticate yes yes
16-
authenticate.check
17-
data yes yes
18-
data.description yes yes
19-
data.upload
20-
data.delete
21-
data.licences
22-
data.features yes yes
23-
data.qualities yes yes
24-
data.qualities.list
25-
task yes yes
26-
task.types.search yes yes
27-
task.evaluations
28-
task.types
29-
estimationprocedure
30-
implementation.exists
31-
implementation.upload
32-
implementation.owned
33-
implementation.delete
34-
implementation.licences
35-
evaluation.measures
36-
run
37-
run.upload
38-
run.delete
39-
job
40-
setup
15+
/data/list/ yes yes
16+
/data/list/active/
17+
/data/list/tag/{tag}
18+
/data/{data_id} yes yes
19+
/data/delete/
20+
/data/upload/
21+
/data/features/{data_id} yes yes
22+
/data/features/upload/
23+
/data/qualities/{data_id} yes yes
24+
/data/qualities/list
25+
/data/qualities/upload
26+
/data/tag
27+
/data/untag
28+
/task/list yes yes
29+
/task/list/active
30+
/task/list/tag/{tag}
31+
/task/{task_id} yes yes
32+
/task/tag
33+
/task/untag
34+
/task/delete
35+
/tasktype/list
36+
/tasktype/{task_id}
37+
/flow/tag
38+
/flow/untag
39+
/flow/{flow_id}
40+
/flow/
41+
/flow/exists/{name,ext_version}
42+
/flow/owned
43+
/run/list
44+
/run/{run_id}
45+
/run
46+
/run/tag
47+
/run/untag
48+
/run/evaluate
49+
/run/reset
50+
/estimationprocedure/{proc_id}
51+
/estimationprocedure/list
52+
/evaluationmeasures/list
53+
/job/request/
4154
=============================================== =========== ====== =============== ========== =====================
4255

56+
This list does not contain the `/setup/` calls because we do not need them
57+
according to Jan.
58+
4359
Convenience Functions
4460
=====================
4561

0 commit comments

Comments
 (0)