Skip to content

Commit 6a97b1c

Browse files
committed
Merge pull request #16 from openml/feature/travis-ci_secure-APIKEY
Feature/travis ci secure apikey
2 parents f83307f + 9712ddf commit 6a97b1c

7 files changed

Lines changed: 226 additions & 27 deletions

File tree

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ python:
44
- "2.7"
55
- "3.4"
66

7+
env:
8+
secure: "KTU56Bhft39FhFnV80Ek+Ht8nwAAJWlLAN104bALBzQWVraoD/znx0gQnoS+YQDjzxgpj30UKBua/o8q1IrvkjxJb8yUBzpS0P1jcGwqmpVRoNdb3pQPk8R7fB9pTFiaJUQbdQJ2/xTrB/T9Kda0J1zq81LC1zSOxAxUL47UI50="
9+
710
before_install:
811
- sudo apt-get install -q libatlas3gf-base libatlas-dev liblapack-dev gfortran
912

openml/apiconnector.py

Lines changed: 165 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from collections import OrderedDict
2-
import hashlib
32
import logging
43
import os
54
import re
@@ -25,8 +24,11 @@
2524
from .entities.dataset import OpenMLDataset
2625
from .entities.task import OpenMLTask
2726
from .entities.split import OpenMLSplit
27+
from .entities.run import OpenMLRun
2828
from .util import is_string
2929

30+
import numpy as np
31+
3032
logger = logging.getLogger(__name__)
3133

3234
OPENML_URL = "http://api_new.openml.org/v1/"
@@ -806,16 +808,162 @@ def _download_split(self, task, cache_file):
806808

807809
def _create_task_cache_dir(self, task_id):
808810
task_cache_dir = os.path.join(self.task_cache_dir, str(task_id))
811+
809812
try:
810813
os.makedirs(task_cache_dir)
811814
except (IOError, OSError):
812815
# TODO add debug information!
813816
pass
814817
return task_cache_dir
815818

816-
def _perform_api_call(self, call, data=None, filePath=None, add_authentication=True):
819+
############################################################################
820+
# Runs
821+
def get_runs_list(self, task_id=None, flow_id=None, setup_id=None):
822+
"""Return a list of all runs for either a task, flow or setup.
823+
824+
Exactly one of the optional parameters must be given.
825+
826+
Parameters
827+
----------
828+
task_id : int, optional
829+
flow_id : int, optional
830+
setup_id : int, optional
831+
832+
Returns
833+
-------
834+
list
835+
A list of all runs run IDs for a given ID.
817836
"""
818-
Perform an API call at the OpenML server.
837+
test = [task_id is None, flow_id is None, setup_id is None]
838+
if np.nansum(test) != 2:
839+
raise ValueError
840+
841+
call = "run/list"
842+
843+
if task_id is not None:
844+
call += "?task_id=%d" % task_id
845+
elif flow_id is not None:
846+
call += "?implementation_id=%d" % flow_id
847+
elif setup_id is not None:
848+
call += "?setup_id=%d" % setup_id
849+
850+
return_code, xml_string = self._perform_api_call(call)
851+
datasets_dict = xmltodict.parse(xml_string)
852+
853+
854+
if isinstance(datasets_dict['oml:runs']['oml:run'], dict):
855+
runs = [datasets_dict['oml:runs']['oml:run']]
856+
else:
857+
# Minimalistic check if the XML is useful
858+
assert type(datasets_dict['oml:runs']['oml:run']) == list, \
859+
type(datasets_dict['oml:runs']['oml:run'])
860+
assert datasets_dict['oml:runs']['@xmlns:oml'] == \
861+
'http://openml.org/openml'
862+
863+
runs = []
864+
for runs_ in datasets_dict['oml:runs']['oml:run']:
865+
run = {'run_id': int(runs_['oml:run_id']),
866+
'task_id': int(runs_['oml:task_id']),
867+
'setup_id': int(runs_['oml:setup_id']),
868+
'implementation_id': int(runs_['oml:implementation_id']),
869+
'uploader': int(runs_['oml:uploader'])}
870+
871+
runs.append(run)
872+
runs.sort(key=lambda t: t['run_id'])
873+
874+
return runs
875+
876+
def download_run(self, run_id):
877+
"""Download the OpenML run for a given run ID.
878+
879+
Parameters
880+
----------
881+
run_id : int
882+
The OpenML run id.
883+
"""
884+
try:
885+
run_id = int(run_id)
886+
except:
887+
raise ValueError("Task ID is neither an Integer nor can be "
888+
"cast to an Integer.")
889+
890+
xml_file = os.path.join(self._create_run_cache_dir(run_id),
891+
"run.xml")
892+
893+
try:
894+
with open(xml_file) as fh:
895+
run = self._create_run_from_xml(fh.read())
896+
except (OSError, IOError):
897+
898+
try:
899+
return_code, run_xml = self._perform_api_call(
900+
"run/%d" % run_id)
901+
except (URLError, UnicodeEncodeError) as e:
902+
print(e)
903+
raise e
904+
905+
# Cache the xml task file
906+
if os.path.exists(xml_file):
907+
with open(xml_file) as fh:
908+
local_xml = fh.read()
909+
910+
if run_xml != local_xml:
911+
raise ValueError("Run description of run %d cached at %s "
912+
"has changed." % (run_id, xml_file))
913+
914+
else:
915+
with open(xml_file, "w") as fh:
916+
fh.write(run_xml)
917+
918+
run = self._create_run_from_xml(run_xml)
919+
920+
return run
921+
922+
def _create_run_cache_dir(self, run_id):
923+
run_cache_dir = os.path.join(self.task_cache_dir, str(run_id))
924+
925+
try:
926+
os.makedirs(run_cache_dir)
927+
except (IOError, OSError):
928+
# TODO add debug information!
929+
pass
930+
return run_cache_dir
931+
932+
def _create_run_from_xml(self, xml):
933+
dic = xmltodict.parse(xml)[u"oml:run"]
934+
datasets = []
935+
for key in dic[u'oml:input_data']:
936+
dataset = dic[u'oml:input_data'][key]
937+
did = dataset[u'oml:did']
938+
datasets.append(did)
939+
940+
tags = []
941+
for tag in dic[u"oml:tag"]:
942+
tags.append(tag)
943+
944+
files = dict()
945+
for file_ in dic[u"oml:output_data"][u"oml:file"]:
946+
name = file_[u"oml:name"]
947+
url = file_[u"oml:url"]
948+
files[name] = url
949+
950+
evaluations = dict()
951+
for evaluation in dic[u"oml:output_data"][u"oml:evaluation"]:
952+
name = evaluation[u"oml:name"]
953+
value = evaluation.get(u"oml:value")
954+
value_array = evaluation.get(u"oml:array_data")
955+
evaluations[name] = (value, value_array)
956+
957+
return OpenMLRun(
958+
dic[u"oml:run_id"], dic[u"oml:uploader"],
959+
dic[u"oml:task_id"], dic[u"oml:implementation_id"],
960+
dic[u"oml:setup_string"], dic[u'oml:setup_id'],
961+
tags, datasets, files, evaluations)
962+
963+
############################################################################
964+
# Internal stuff
965+
def _perform_api_call(self, call, data=None, file_path=None):
966+
"""Perform an API call at the OpenML server.
819967
return self._read_url(url, data=data, filePath=filePath,
820968
def _read_url(self, url, add_authentication=False, data=None, filePath=None):
821969
@@ -840,32 +988,33 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
840988
if not url.endswith("/"):
841989
url += "/"
842990
url += call
843-
return self._read_url(url, data=data,filePath= filePath)
991+
return self._read_url(url, data=data, file_path=file_path)
844992

845-
def _read_url(self, url, data=None, filePath=None):
993+
def _read_url(self, url, data=None, file_path=None):
846994
if data is None:
847995
data = {}
848-
data['session_hash'] = self.config.get('FAKE_SECTION', 'apikey')
996+
data['api_key'] = self.config.get('FAKE_SECTION', 'apikey')
849997

850-
if filePath is not None:
851-
if os.path.isabs(filePath):
998+
if file_path is not None:
999+
if os.path.isabs(file_path):
8521000
try:
8531001
decoder = arff.ArffDecoder()
8541002
except:
8551003
raise "The file you provided is not a valid arff file"
8561004

857-
fileElement={'dataset': open(filePath, 'rb')}
1005+
fileElement={'dataset': open(file_path, 'rb')}
8581006
data['description']= data.get('description')
8591007
data.pop('dataset', None)
8601008

8611009
try:
8621010
response = requests.post(url, data=data, files=fileElement)
863-
except URLError, error:
864-
print error
1011+
except URLError as error:
1012+
print(error)
8651013

8661014
return response.status_code, response
8671015
else:
8681016
raise "File doesn't exists"
1017+
8691018
else:
8701019
data = urlencode(data)
8711020
data = data.encode('utf-8')
@@ -907,10 +1056,11 @@ def _read_url(self, url, data=None, filePath=None):
9071056
string.write(chunk)
9081057
return return_code, string.getvalue()
9091058

910-
def upload_dataset(self, description, filePath=None):
1059+
def upload_dataset(self, description, file_path=None):
9111060
try:
9121061
data = {'description': description}
913-
return_code, dataset_xml = self._perform_api_call("/data/",data=data, filePath=filePath)
1062+
return_code, dataset_xml = self._perform_api_call(
1063+
"/data/", data=data, file_path=file_path)
9141064

9151065
except URLError as e:
9161066
# TODO logger.debug
@@ -921,7 +1071,8 @@ def upload_dataset(self, description, filePath=None):
9211071
def upload_flow(self, description, binary, source):
9221072
try:
9231073
data = {'description': description, 'binary': binary, 'source': source}
924-
return_code, dataset_xml = self._perform_api_call("openml.implementation.upload", data=data)
1074+
return_code, dataset_xml = self._perform_api_call(
1075+
"openml.implementation.upload", data=data)
9251076

9261077
except URLError as e:
9271078
# TODO logger.debug

openml/entities/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def __init__(self, id, name, version, description, format, creator,
8181
else:
8282
raise Exception()
8383

84-
with open(self.data_pickle_file, "w") as fh:
84+
with open(self.data_pickle_file, "wb") as fh:
8585
pickle.dump((X, categorical, attribute_names), fh, -1)
8686
logger.debug("Saved dataset %d: %s to file %s" %
8787
(self.id, self.name, self.data_pickle_file))

openml/entities/run.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
class OpenMLRun(object):
2+
def __init__(self, run_id, uploader, task_id, flow_id, setup_string,
3+
setup_id, tags, datasets, files, evaluations):
4+
self.run_id = run_id
5+
self.uploader = uploader
6+
self.task_id = task_id
7+
self.flow_id = flow_id
8+
self.setup_id = setup_id
9+
self.setup_string = setup_string
10+
self.tags = tags
11+
self.datasets = datasets
12+
self.files = files
13+
self.evaluations = evaluations

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
"scipy>=0.13.3",
1717
"xmltodict",
1818
"nose",
19-
"numpydoc"],
19+
"numpydoc",
20+
"requests"],
2021
test_suite="nose.collector",
2122
classifiers=['Intended Audience :: Science/Research',
2223
'Intended Audience :: Developers',

source/progress.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ API calls
1313
API call implemented tested properly tested loads json proper error handling
1414
=============================================== =========== ====== =============== ========== =====================
1515
/data/list/ yes yes
16-
/data/list/active/
1716
/data/list/tag/{tag}
1817
/data/{data_id} yes yes
1918
/data/delete/
@@ -26,7 +25,6 @@ API call implemented tested properly test
2625
/data/tag
2726
/data/untag
2827
/task/list yes yes
29-
/task/list/active
3028
/task/list/tag/{tag}
3129
/task/{task_id} yes yes
3230
/task/tag
@@ -40,8 +38,8 @@ API call implemented tested properly test
4038
/flow/
4139
/flow/exists/{name,ext_version}
4240
/flow/owned
43-
/run/list
44-
/run/{run_id}
41+
/run/list yes yes
42+
/run/{run_id} yes yes
4543
/run
4644
/run/tag
4745
/run/untag

tests/test_apiconnector.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ class TestAPIConnector(unittest.TestCase):
2727
"""
2828

2929
def setUp(self):
30-
config_file = os.path.expanduser('~/.openml/config')
31-
if not os.path.exists(config_file):
32-
raise Exception("OpenML config file required to run unit tests. "
33-
"See https://github.com/openml/OpenML/wiki/Client-API")
34-
3530
self.cwd = os.getcwd()
3631
workdir = os.path.dirname(os.path.abspath(__file__))
3732
self.workdir = os.path.join(workdir, "tmp")
@@ -44,8 +39,13 @@ def setUp(self):
4439
os.chdir(self.workdir)
4540

4641
self.cached = True
42+
43+
try:
44+
apikey = os.environ['OPENMLAPIKEY']
45+
except:
46+
apikey = None
4747
self.connector = APIConnector(cache_directory=self.workdir,
48-
apikey='test')
48+
apikey=apikey)
4949

5050
def tearDown(self):
5151
os.chdir(self.cwd)
@@ -66,7 +66,7 @@ def test_get_cached_datasets(self):
6666
datasets = connector.get_cached_datasets()
6767
self.assertIsInstance(datasets, dict)
6868
self.assertEqual(len(datasets), 2)
69-
self.assertIsInstance(datasets.values()[0], OpenMLDataset)
69+
self.assertIsInstance(list(datasets.values())[0], OpenMLDataset)
7070

7171
def test_get_cached_dataset(self):
7272
workdir = os.path.dirname(os.path.abspath(__file__))
@@ -221,6 +221,39 @@ def test_download_split(self):
221221
self.assertTrue(os.path.exists(
222222
os.path.join(os.getcwd(), "tasks", "1", "datasplits.arff")))
223223

224+
############################################################################
225+
# Runs
226+
def test_download_run_list(self):
227+
def check_run(run):
228+
self.assertIsInstance(run, dict)
229+
self.assertEqual(len(run), 5)
230+
231+
runs = self.connector.get_runs_list(task_id=1)
232+
# 1759 as the number of supervised classification tasks retrieved
233+
# openml.org from this call; don't trust the number on openml.org as
234+
# it also counts private datasets
235+
self.assertGreaterEqual(len(runs), 800)
236+
for run in runs:
237+
check_run(run)
238+
239+
runs = self.connector.get_runs_list(flow_id=1)
240+
self.assertGreaterEqual(len(runs), 1)
241+
for task in runs:
242+
check_run(task)
243+
244+
runs = self.connector.get_runs_list(setup_id=1)
245+
self.assertGreaterEqual(len(runs), 261)
246+
for task in runs:
247+
check_run(task)
248+
249+
def test_download_run(self):
250+
run = self.connector.download_run(473350)
251+
self.assertGreaterEqual(len(run.tags), 2)
252+
self.assertEqual(len(run.datasets), 1)
253+
self.assertGreaterEqual(len(run.files), 2)
254+
self.assertGreaterEqual(len(run.evaluations), 18)
255+
self.assertEqual(len(run.evaluations['f_measure']), 2)
256+
224257
def test_upload_dataset(self):
225258

226259
dataset = self.connector.download_dataset(3)

0 commit comments

Comments
 (0)