Skip to content

Commit 0e61650

Browse files
committed
Merge
2 parents 2bfe552 + 198b07e commit 0e61650

12 files changed

Lines changed: 247 additions & 249 deletions

File tree

tests/.travis.yml renamed to .travis.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,7 @@ install:
2626
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy==0.13.3 matplotlib setuptools nose
2727
- source activate test-environment
2828
- python setup.py install
29-
- pip install xmltodict
30-
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
31-
pip install mock
32-
fi
33-
- python setup.py install
29+
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then pip install mock; fi
3430

3531
# command to run tests, e.g. python setup.py test
3632
script: python setup.py test

openml/apiconnector.py

Lines changed: 47 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
import xmltodict
2424

2525
from .entities.dataset import OpenMLDataset
26-
from .entities.task import Task
26+
from .entities.task import OpenMLTask
2727
from .entities.split import OpenMLSplit
2828
from .util import is_string
2929

3030
logger = logging.getLogger(__name__)
3131

32-
OPENML_URL = "http://www.openml.org"
32+
OPENML_URL = "http://api_new.openml.org/v1/"
3333

3434

3535
class OpenMLStatusChange(Warning):
@@ -52,10 +52,6 @@ def __init__(self, message):
5252
super(OpenMLServerError, self).__init__(message)
5353

5454

55-
class AuthentificationError(PyOpenMLError):
56-
def __init__(self, message):
57-
super(AuthentificationError, self).__init__(message)
58-
5955
class OpenMLCacheException(PyOpenMLError):
6056
def __init__(self, message):
6157
super(OpenMLCacheException, self).__init__(message)
@@ -79,14 +75,9 @@ class APIConnector(object):
7975
directory '.openml/cache' in the users home directory will be used.
8076
If either directory does not exist, it will be created.
8177
82-
username : string, optional (default=None)
83-
Your username.
84-
85-
password : string, optional (default=None)
86-
Your passwort. This will not be stored! Instead, the md5 hash is
87-
calculated and used to authenticate to the OpenML server,
88-
which returns a session key. This session key is the only credential
89-
which is stored.
78+
apikey : string, optional (default=None)
79+
Your OpenML API key which will be used to authenticate you at the OpenML
80+
server.
9081
9182
server : string, optional (default=None)
9283
The OpenML server to connect to.
@@ -112,24 +103,18 @@ class APIConnector(object):
112103
Raises
113104
------
114105
ValueError
115-
If one of the following variables is neither specified in the
116-
configuration file nor when creating the APIconnector class:
117-
username, password
118-
AuthentificationError
119-
If authentification at the OpenML server does not work.
106+
If apikey is neither specified in the config nor given as an argument.
120107
OpenMLServerError
121108
If the OpenML server returns an unexptected response.
122109
123110
Testing the API calls in Firefox
124111
--------------------------------
125-
With the Firefox AddOn HTTPRequestor, one can check the OpenML API calls
126-
which need authentification (by providing such). First, create the md5
127-
hash of your OpenML password and add it to
112+
With the Firefox AddOn HTTPRequestor, one can check the OpenML API calls.
128113
129114
"""
130-
def __init__(self, cache_directory=None, username=None, password=None,
115+
def __init__(self, cache_directory=None, apikey=None,
131116
server=None, verbosity=None, configure_logger=True,
132-
authenticate=True, private_directory=None):
117+
private_directory=None):
133118
# The .openml directory is necessary, just try to create it (EAFP)
134119
try:
135120
os.mkdir(os.path.expanduser('~/.openml'))
@@ -141,10 +126,8 @@ def __init__(self, cache_directory=None, username=None, password=None,
141126
self.config = self._parse_config()
142127
if cache_directory is not None:
143128
self.config.set('FAKE_SECTION', 'cachedir', cache_directory)
144-
if username is not None:
145-
self.config.set('FAKE_SECTION', 'username', username)
146-
if password is not None:
147-
self.config.set('FAKE_SECTION', 'password', password)
129+
if apikey is not None:
130+
self.config.set('FAKE_SECTION', 'apikey', apikey)
148131
if server is not None:
149132
self.config.set('FAKE_SECTION', 'server', server)
150133
if verbosity is not None:
@@ -164,11 +147,6 @@ def __init__(self, cache_directory=None, username=None, password=None,
164147
format='[%(levelname)s] [%(asctime)s:%(name)s] %('
165148
'message)s', datefmt='%H:%M:%S', level=level)
166149

167-
if authenticate:
168-
self._session_hash = self._authenticate(
169-
self.config.get('FAKE_SECTION', 'username'),
170-
self.config.get('FAKE_SECTION', 'password'))
171-
172150
# Set up the cache directories
173151
self.cache_dir = self.config.get('FAKE_SECTION', 'cachedir')
174152
self.dataset_cache_dir = os.path.join(self.cache_dir, "datasets")
@@ -189,50 +167,8 @@ def __init__(self, cache_directory=None, username=None, password=None,
189167
if not os.path.exists(dir_) and not os.path.isdir(dir_):
190168
os.mkdir(dir_)
191169

192-
def _authenticate(self, username, password):
193-
# Check the username
194-
if username is None:
195-
raise ValueError("No username specified.")
196-
elif not is_string(username):
197-
raise ValueError("Username must be of type string.")
198-
elif not username:
199-
raise ValueError("No value for argument username specified.")
200-
201-
# Check the password
202-
if password is None:
203-
raise ValueError("No password specified.")
204-
elif not is_string(password):
205-
raise ValueError("Password must be of type string.")
206-
elif not password:
207-
raise ValueError("No value for argument password specified.")
208-
209-
m = hashlib.md5()
210-
m.update(password.encode('utf-8'))
211-
md5 = m.hexdigest()
212-
213-
# TODO: catch possible exceptions
214-
data = {'username': username, 'password': md5}
215-
return_code, xml_string = self._perform_api_call(
216-
"openml.authenticate", data=data, add_authentication=False)
217-
218-
xml_dict = xmltodict.parse(xml_string)
219-
if xml_dict.get('oml:authenticate'):
220-
session_hash = xml_dict['oml:authenticate']['oml:session_hash']
221-
return session_hash
222-
elif xml_dict.get('oml:error'):
223-
error_code = xml_dict['oml:error']['oml:code']
224-
if error_code == '252':
225-
raise AuthentificationError(
226-
"Authentication failed. The username and password did not "
227-
"match any record in the database.")
228-
else:
229-
OpenMLServerError(
230-
"Unexpected server response code %d with response"
231-
" message %s" % (return_code, xml_string))
232-
233170
def _parse_config(self):
234-
defaults = {'username': '',
235-
'password': '',
171+
defaults = {'apikey': '',
236172
'server': OPENML_URL,
237173
'verbosity': 0,
238174
'cachedir': os.path.expanduser('~/.openml/cache'),
@@ -438,7 +374,7 @@ def get_dataset_list(self):
438374
these are also returned.
439375
"""
440376
# TODO add proper error handling here!
441-
return_code, xml_string = self._perform_api_call("openml.data")
377+
return_code, xml_string = self._perform_api_call("data/list/")
442378
datasets_dict = xmltodict.parse(xml_string)
443379

444380
# Minimalistic check if the XML is useful
@@ -553,7 +489,7 @@ def download_dataset_description(self, did):
553489
except (OpenMLCacheException):
554490
try:
555491
return_code, dataset_xml = self._perform_api_call(
556-
"openml.data.description", data_id=did)
492+
"data/%d" % did)
557493
except (URLError, UnicodeEncodeError) as e:
558494
# TODO logger.debug
559495
self._remove_dataset_chache_dir(did)
@@ -568,7 +504,7 @@ def download_dataset_description(self, did):
568504
"oml:data_set_description"]
569505
except Exception as e:
570506
# TODO logger.debug
571-
self._remove_dataset_chache_dir()
507+
self._remove_dataset_chache_dir(did)
572508
print("Dataset ID", did)
573509
raise e
574510

@@ -613,7 +549,7 @@ def download_dataset_features(self, did):
613549
except (OSError, IOError):
614550
try:
615551
return_code, features_xml = self._perform_api_call(
616-
"openml.data.features", data_id=did)
552+
"data/features/%d" % did)
617553
except (URLError, UnicodeEncodeError) as e:
618554
# TODO logger.debug
619555
print(e)
@@ -637,7 +573,7 @@ def download_dataset_qualities(self, did):
637573
qualities_file = os.path.join(did_cache_dir, "qualities.xml")
638574
try:
639575
return_code, qualities_xml = self._perform_api_call(
640-
"openml.data.qualities", data_id=did)
576+
"data/qualities/%d" % did)
641577
except (URLError, UnicodeEncodeError) as e:
642578
# TODO logger.debug
643579
print(e)
@@ -725,7 +661,7 @@ def get_task_list(self, task_type_id=1):
725661
"cast to an Integer.")
726662

727663
return_code, xml_string = self._perform_api_call(
728-
"openml.tasks", task_type_id=task_type_id)
664+
"task/list/%d" % task_type_id)
729665
tasks_dict = xmltodict.parse(xml_string)
730666
# Minimalistic check if the XML is useful
731667
assert tasks_dict['oml:tasks']['@xmlns:oml'] == \
@@ -775,7 +711,7 @@ def download_task(self, task_id):
775711

776712
try:
777713
return_code, task_xml = self._perform_api_call(
778-
"openml.task.search", task_id=task_id)
714+
"task/%d" % task_id)
779715
except (URLError, UnicodeEncodeError) as e:
780716
print(e)
781717
raise e
@@ -819,7 +755,7 @@ def _create_task_from_xml(self, xml):
819755
text = parameter.get("#text", "")
820756
estimation_parameters[name] = text
821757

822-
return Task(
758+
return OpenMLTask(
823759
dic["oml:task_id"], dic["oml:task_type"],
824760
inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
825761
inputs["source_data"]["oml:data_set"]["oml:target_feature"],
@@ -878,21 +814,38 @@ def _create_task_cache_dir(self, task_id):
878814
return task_cache_dir
879815

880816
def _perform_api_call(self, call, data=None, filePath=None, add_authentication=True,
881-
**kwargs):
882-
# TODO: do input validation!
883-
url = self.config.get("FAKE_SECTION", "server") + "/api/?f="
884-
url += "" + call
885-
if kwargs:
886-
for key in kwargs:
887-
url += "&" + key + "=" + str(kwargs[key])
888-
# TODO logger.debug(url)
817+
"""Perform an API call at the OpenML server.
889818
return self._read_url(url, data=data, filePath=filePath,
890-
add_authentication=add_authentication)
891819
892820
def _read_url(self, url, add_authentication=False, data=None, filePath=None):
821+
822+
Parameters
823+
----------
824+
call : str
825+
The API call. For example data/list
826+
data : dict (default=None)
827+
Dictionary containing data which will be sent to the OpenML
828+
server via a POST request.
829+
**kwargs
830+
Further arguments which are appended as GET arguments.
831+
832+
Returns
833+
-------
834+
return_code : int
835+
HTTP return code
836+
return_value : str
837+
Return value of the OpenML server
838+
"""
839+
url = self.config.get("FAKE_SECTION", "server")
840+
if not url.endswith("/"):
841+
url += "/"
842+
url += call
843+
return self._read_url(url, data=data)
844+
845+
def _read_url(self, url, data=None):
893846
if data is None:
894847
data = {}
895-
if add_authentication:
848+
data['session_hash'] = self.config.get('FAKE_SECTION', 'apikey')
896849
data['session_hash'] = self._session_hash
897850

898851
if filePath is not None:

openml/entities/task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pickle
1010

1111

12-
class Task(object):
12+
class OpenMLTask(object):
1313
def __init__(self, task_id, task_type, data_set_id, target_feature,
1414
estimation_procedure_type, data_splits_url,
1515
estimation_parameters, evaluation_measure,cost_matrix, api_connector):

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy>=1.6.2
2+
scipy>=0.13.3
3+
numpydoc

setup.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111
version="0.0.1dev",
1212
packages=setuptools.find_packages(),
1313
package_data={'': ['*.txt', '*.md']},
14-
install_requires=["liac-arff>=2.1.0",
14+
install_requires=["liac-arff>=2.1.1dev",
1515
"numpy>=1.6.2",
1616
"scipy>=0.13.3",
1717
"xmltodict",
18-
"nose"],
18+
"nose",
19+
"numpydoc"],
1920
test_suite="nose.collector",
2021
classifiers=['Intended Audience :: Science/Research',
2122
'Intended Audience :: Developers',
@@ -31,4 +32,7 @@
3132
'Programming Language :: Python :: 3',
3233
'Programming Language :: Python :: 3.3',
3334
'Programming Language :: Python :: 3.4',
34-
])
35+
],
36+
dependency_links=[
37+
"http://github.com/mfeurer/liac-arff/archive/master.zip"
38+
"#egg=liac-arff-2.1.1dev"])

source/api.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,10 @@
55
APIs
66
****
77

8-
.. autoclass:: openml.apiconnector.APIConnector
8+
.. autoclass:: openml.apiconnector.APIConnector
9+
10+
.. autoclass:: openml.entities.dataset.OpenMLDataset
11+
12+
.. autoclass:: openml.entities.task.OpenMLTask
13+
14+
.. autoclass:: openml.entities.split.OpenMLSplit

source/conf.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,25 @@
1212
# All configuration values have a default; values that are commented out
1313
# serve to show the default.
1414

15-
import sys
1615
import os
16+
import sys
17+
18+
if sys.version_info[0] >= 3:
19+
from unittest.mock import MagicMock
20+
else:
21+
from mock import MagicMock
22+
23+
24+
class Mock(MagicMock):
25+
@classmethod
26+
def __getattr__(cls, name):
27+
return Mock()
28+
29+
30+
MOCK_MODULES = ['pygtk', 'gtk', 'gobject', 'argparse', 'numpy', 'pandas',
31+
'scipy', 'scipy.sparse', 'scipy.io', 'scipy.io.arff']
32+
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
33+
1734

1835
# If extensions (or modules to document with autodoc) are in another directory,
1936
# add these directories to sys.path here. If the directory is relative to the
@@ -33,10 +50,12 @@
3350
# ones.
3451
extensions = [
3552
'sphinx.ext.autodoc',
53+
'sphinx.ext.autosummary',
3654
'sphinx.ext.doctest',
3755
'sphinx.ext.coverage',
3856
'sphinx.ext.mathjax',
3957
'sphinx.ext.ifconfig',
58+
'numpydoc'
4059
]
4160

4261
# Add any paths that contain templates here, relative to this directory.

0 commit comments

Comments
 (0)