Skip to content

Commit 7026f81

Browse files
committed
Merge branch 'release/0.1.0'
2 parents 6a97b1c + 7825b09 commit 7026f81

3 files changed

Lines changed: 98 additions & 206 deletions

File tree

openml/apiconnector.py

Lines changed: 44 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict
2+
import hashlib
23
import logging
34
import os
45
import re
@@ -816,154 +817,9 @@ def _create_task_cache_dir(self, task_id):
816817
pass
817818
return task_cache_dir
818819

819-
############################################################################
820-
# Runs
821-
def get_runs_list(self, task_id=None, flow_id=None, setup_id=None):
822-
"""Return a list of all runs for either a task, flow or setup.
823-
824-
Exactly one of the optional parameters must be given.
825-
826-
Parameters
827-
----------
828-
task_id : int, optional
829-
flow_id : int, optional
830-
setup_id : int, optional
831-
832-
Returns
833-
-------
834-
list
835-
A list of all runs run IDs for a given ID.
820+
def _perform_api_call(self, call, data=None, file_dictionary=None, add_authentication=True):
836821
"""
837-
test = [task_id is None, flow_id is None, setup_id is None]
838-
if np.nansum(test) != 2:
839-
raise ValueError
840-
841-
call = "run/list"
842-
843-
if task_id is not None:
844-
call += "?task_id=%d" % task_id
845-
elif flow_id is not None:
846-
call += "?implementation_id=%d" % flow_id
847-
elif setup_id is not None:
848-
call += "?setup_id=%d" % setup_id
849-
850-
return_code, xml_string = self._perform_api_call(call)
851-
datasets_dict = xmltodict.parse(xml_string)
852-
853-
854-
if isinstance(datasets_dict['oml:runs']['oml:run'], dict):
855-
runs = [datasets_dict['oml:runs']['oml:run']]
856-
else:
857-
# Minimalistic check if the XML is useful
858-
assert type(datasets_dict['oml:runs']['oml:run']) == list, \
859-
type(datasets_dict['oml:runs']['oml:run'])
860-
assert datasets_dict['oml:runs']['@xmlns:oml'] == \
861-
'http://openml.org/openml'
862-
863-
runs = []
864-
for runs_ in datasets_dict['oml:runs']['oml:run']:
865-
run = {'run_id': int(runs_['oml:run_id']),
866-
'task_id': int(runs_['oml:task_id']),
867-
'setup_id': int(runs_['oml:setup_id']),
868-
'implementation_id': int(runs_['oml:implementation_id']),
869-
'uploader': int(runs_['oml:uploader'])}
870-
871-
runs.append(run)
872-
runs.sort(key=lambda t: t['run_id'])
873-
874-
return runs
875-
876-
def download_run(self, run_id):
877-
"""Download the OpenML run for a given run ID.
878-
879-
Parameters
880-
----------
881-
run_id : int
882-
The OpenML run id.
883-
"""
884-
try:
885-
run_id = int(run_id)
886-
except:
887-
raise ValueError("Task ID is neither an Integer nor can be "
888-
"cast to an Integer.")
889-
890-
xml_file = os.path.join(self._create_run_cache_dir(run_id),
891-
"run.xml")
892-
893-
try:
894-
with open(xml_file) as fh:
895-
run = self._create_run_from_xml(fh.read())
896-
except (OSError, IOError):
897-
898-
try:
899-
return_code, run_xml = self._perform_api_call(
900-
"run/%d" % run_id)
901-
except (URLError, UnicodeEncodeError) as e:
902-
print(e)
903-
raise e
904-
905-
# Cache the xml task file
906-
if os.path.exists(xml_file):
907-
with open(xml_file) as fh:
908-
local_xml = fh.read()
909-
910-
if run_xml != local_xml:
911-
raise ValueError("Run description of run %d cached at %s "
912-
"has changed." % (run_id, xml_file))
913-
914-
else:
915-
with open(xml_file, "w") as fh:
916-
fh.write(run_xml)
917-
918-
run = self._create_run_from_xml(run_xml)
919-
920-
return run
921-
922-
def _create_run_cache_dir(self, run_id):
923-
run_cache_dir = os.path.join(self.task_cache_dir, str(run_id))
924-
925-
try:
926-
os.makedirs(run_cache_dir)
927-
except (IOError, OSError):
928-
# TODO add debug information!
929-
pass
930-
return run_cache_dir
931-
932-
def _create_run_from_xml(self, xml):
933-
dic = xmltodict.parse(xml)[u"oml:run"]
934-
datasets = []
935-
for key in dic[u'oml:input_data']:
936-
dataset = dic[u'oml:input_data'][key]
937-
did = dataset[u'oml:did']
938-
datasets.append(did)
939-
940-
tags = []
941-
for tag in dic[u"oml:tag"]:
942-
tags.append(tag)
943-
944-
files = dict()
945-
for file_ in dic[u"oml:output_data"][u"oml:file"]:
946-
name = file_[u"oml:name"]
947-
url = file_[u"oml:url"]
948-
files[name] = url
949-
950-
evaluations = dict()
951-
for evaluation in dic[u"oml:output_data"][u"oml:evaluation"]:
952-
name = evaluation[u"oml:name"]
953-
value = evaluation.get(u"oml:value")
954-
value_array = evaluation.get(u"oml:array_data")
955-
evaluations[name] = (value, value_array)
956-
957-
return OpenMLRun(
958-
dic[u"oml:run_id"], dic[u"oml:uploader"],
959-
dic[u"oml:task_id"], dic[u"oml:implementation_id"],
960-
dic[u"oml:setup_string"], dic[u'oml:setup_id'],
961-
tags, datasets, files, evaluations)
962-
963-
############################################################################
964-
# Internal stuff
965-
def _perform_api_call(self, call, data=None, file_path=None):
966-
"""Perform an API call at the OpenML server.
822+
Perform an API call at the OpenML server.
967823
return self._read_url(url, data=data, filePath=filePath,
968824
def _read_url(self, url, add_authentication=False, data=None, filePath=None):
969825
@@ -988,33 +844,35 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
988844
if not url.endswith("/"):
989845
url += "/"
990846
url += call
991-
return self._read_url(url, data=data, file_path=file_path)
847+
return self._read_url(url, data=data, file_dictionary=file_dictionary)
992848

993-
def _read_url(self, url, data=None, file_path=None):
849+
def _read_url(self, url, data=None, file_dictionary=None):
994850
if data is None:
995851
data = {}
996852
data['api_key'] = self.config.get('FAKE_SECTION', 'apikey')
997853

998-
if file_path is not None:
999-
if os.path.isabs(file_path):
1000-
try:
1001-
decoder = arff.ArffDecoder()
1002-
except:
1003-
raise "The file you provided is not a valid arff file"
1004-
1005-
fileElement={'dataset': open(file_path, 'rb')}
1006-
data['description']= data.get('description')
1007-
data.pop('dataset', None)
854+
if file_dictionary is not None:
855+
file_elements = {}
856+
for key, path in file_dictionary.items():
857+
if os.path.isabs(path) and os.path.exists(path):
858+
try:
859+
if key is 'dataset':
860+
decoder = arff.ArffDecoder()
861+
with open(path) as fh:
862+
decoder.decode(fh, encode_nominal=True)
863+
except:
864+
raise ValueError("The file you have provided is not a valid arff file")
1008865

1009-
try:
1010-
response = requests.post(url, data=data, files=fileElement)
1011-
except URLError as error:
1012-
print(error)
866+
file_elements[key] = open(path, 'rb')
1013867

868+
else:
869+
raise ValueError("File doesn't exist")
870+
try:
871+
response = requests.post(url, data=data, files=file_elements)
1014872
return response.status_code, response
1015-
else:
1016-
raise "File doesn't exists"
1017873

874+
except URLError as error:
875+
print(error)
1018876
else:
1019877
data = urlencode(data)
1020878
data = data.encode('utf-8')
@@ -1059,38 +917,41 @@ def _read_url(self, url, data=None, file_path=None):
1059917
def upload_dataset(self, description, file_path=None):
1060918
try:
1061919
data = {'description': description}
1062-
return_code, dataset_xml = self._perform_api_call(
1063-
"/data/", data=data, file_path=file_path)
920+
if file_path is not None:
921+
return_code, dataset_xml = self._perform_api_call("/data/",data=data, file_dictionary={'dataset': file_path})
922+
else:
923+
return_code, dataset_xml = self._perform_api_call("/data/",data=data)
1064924

1065925
except URLError as e:
1066926
# TODO logger.debug
1067927
print(e)
1068928
raise e
1069929
return return_code, dataset_xml
1070930

1071-
def upload_flow(self, description, binary, source):
931+
def upload_flow(self, description, file_path=None):
1072932
try:
1073-
data = {'description': description, 'binary': binary, 'source': source}
1074-
return_code, dataset_xml = self._perform_api_call(
1075-
"openml.implementation.upload", data=data)
933+
data = {'description': description}
934+
return_code, dataset_xml = self._perform_api_call("/flow/", data=data, file_dictionary={'source': file_path})
1076935

1077936
except URLError as e:
1078937
# TODO logger.debug
1079938
print(e)
1080939
raise e
1081940
return return_code, dataset_xml
1082941

1083-
def upload_run(self, description, files):
1084-
try:
1085-
data ={'description': description}
1086-
for key, value in files:
1087-
data[key] = value
942+
def upload_run(self, files):
943+
file_dictionary = {}
944+
if 'predictions' in files:
945+
try:
946+
for key, value in files.items():
947+
file_dictionary[key] = value
1088948

1089-
return_code, dataset_xml = self._perform_api_call("openml.run.upload", data=data)
1090-
1091-
except URLError as e:
1092-
# TODO logger.debug
1093-
print(e)
1094-
raise e
1095-
return return_code, dataset_xml
949+
return_code, dataset_xml = self._perform_api_call("/run/", file_dictionary=file_dictionary)
1096950

951+
except URLError as e:
952+
# TODO logger.debug
953+
print(e)
954+
raise e
955+
return return_code, dataset_xml
956+
else:
957+
raise ValueError("prediction files doesn't exist")

source/progress.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ API call implemented tested properly test
1616
/data/list/tag/{tag}
1717
/data/{data_id} yes yes
1818
/data/delete/
19-
/data/upload/
19+
/data/upload/ yes yes
2020
/data/features/{data_id} yes yes
2121
/data/features/upload/
2222
/data/qualities/{data_id} yes yes
@@ -32,15 +32,16 @@ API call implemented tested properly test
3232
/task/delete
3333
/tasktype/list
3434
/tasktype/{task_id}
35+
/flow/list yes
3536
/flow/tag
3637
/flow/untag
3738
/flow/{flow_id}
38-
/flow/
39+
/flow/ yes yes
3940
/flow/exists/{name,ext_version}
4041
/flow/owned
4142
/run/list yes yes
4243
/run/{run_id} yes yes
43-
/run
44+
/run yes yes
4445
/run/tag
4546
/run/untag
4647
/run/evaluate

0 commit comments

Comments
 (0)