Skip to content

Commit 2bfe552

Browse files
committed
Fixed the dataset upload so it works now.
1 parent 0453b34 commit 2bfe552

2 files changed

Lines changed: 96 additions & 113 deletions

File tree

openml/apiconnector.py

Lines changed: 67 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import re
66
import sys
77
import tempfile
8+
import requests
9+
import arff
810

911
if sys.version_info[0] < 3:
1012
import ConfigParser as configparser
@@ -875,7 +877,7 @@ def _create_task_cache_dir(self, task_id):
875877
pass
876878
return task_cache_dir
877879

878-
def _perform_api_call(self, call, data=None, add_authentication=True,
880+
def _perform_api_call(self, call, data=None, filePath=None, add_authentication=True,
879881
**kwargs):
880882
# TODO: do input validation!
881883
url = self.config.get("FAKE_SECTION", "server") + "/api/?f="
@@ -884,95 +886,90 @@ def _perform_api_call(self, call, data=None, add_authentication=True,
884886
for key in kwargs:
885887
url += "&" + key + "=" + str(kwargs[key])
886888
# TODO logger.debug(url)
887-
return self._read_url(url, data=data,
889+
return self._read_url(url, data=data, filePath=filePath,
888890
add_authentication=add_authentication)
889891

890-
def _read_url(self, url, add_authentication=False, data=None):
892+
def _read_url(self, url, add_authentication=False, data=None, filePath=None):
891893
if data is None:
892894
data = {}
893895
if add_authentication:
894896
data['session_hash'] = self._session_hash
895-
data = urlencode(data)
896-
data = data.encode('utf-8')
897-
898-
CHUNK = 16 * 1024
899-
900-
string = StringIO()
901-
connection = urlopen(url, data=data)
902-
return_code = connection.getcode()
903-
content_type = connection.info()['Content-Type']
904-
# TODO maybe switch on the unicode flag!
905-
match = re.search(r'text/([\w-]*)(; charset=([\w-]*))?', content_type)
906-
if match:
907-
if match.groups()[2] is not None:
908-
encoding = match.group(3)
909-
else:
910-
encoding = "ascii"
911-
else:
912-
# TODO ask JAN why this happens
913-
logger.warn("Data from %s has content type %s; going to treat "
914-
"this as ascii." % (url, content_type))
915-
encoding = "ascii"
916-
917-
tmp = tempfile.NamedTemporaryFile(mode='w', delete=False)
918-
with tmp as fh:
919-
while True:
920-
chunk = connection.read(CHUNK)
921-
# Chunk is now a proper string (UTF-8 in python)
922-
chunk = chunk.decode(encoding)
923-
if not chunk:
924-
break
925-
fh.write(chunk)
926-
927-
tmp = open(tmp.name, "r")
928-
with tmp as fh:
929-
while True:
930-
chunk = fh.read(CHUNK)
931-
if not chunk:
932-
break
933-
string.write(chunk)
934-
935-
return return_code, string.getvalue()
936-
937-
def upload_dataset(self, description, dataset=None):
938-
try:
939-
data={}
940-
if dataset is None:
941-
data = {'description': description}
942-
else:
943-
data = {'dataset': dataset, 'description': description}
944897

945-
return_code, dataset_xml = self._perform_api_call("openml.data.upload",data=data)
898+
if filePath is not None:
899+
if os.path.isabs(filePath):
900+
try:
901+
decoder = arff.ArffDecoder()
902+
except:
903+
raise "The file you provided is not a valid arff file"
946904

947-
except URLError as e:
948-
# TODO logger.debug
949-
print(e)
950-
raise e
951-
return return_code, dataset_xml
905+
fileElement={'dataset': open(filePath, 'rb')}
906+
data['description']= data.get('description')
907+
data.pop('dataset', None)
952908

953-
def upload_dataset_features(self, description):
954-
try:
955-
data = {'description': description}
956-
return_code, dataset_xml = self._perform_api_call("openml.data.features.upload", data=data)
909+
try:
910+
response = requests.post(url, data=data, files=fileElement)
911+
except URLError, error:
912+
print error
957913

958-
except URLError as e:
959-
# TODO logger.debug
960-
print(e)
961-
raise e
962-
return return_code, dataset_xml
914+
return response.status_code, response
915+
else:
916+
raise "File doesn't exists"
917+
918+
919+
else:
920+
data = urlencode(data)
921+
data = data.encode('utf-8')
922+
923+
CHUNK = 16 * 1024
924+
string = StringIO()
925+
connection = urlopen(url, data=data)
926+
return_code = connection.getcode()
927+
content_type = connection.info()['Content-Type']
928+
# TODO maybe switch on the unicode flag!
929+
match = re.search(r'text/([\w-]*)(; charset=([\w-]*))?', content_type)
930+
if match:
931+
if match.groups()[2] is not None:
932+
encoding = match.group(3)
933+
else:
934+
encoding = "ascii"
935+
else:
936+
# TODO ask JAN why this happens
937+
logger.warn("Data from %s has content type %s; going to treat "
938+
"this as ascii." % (url, content_type))
939+
encoding = "ascii"
963940

964-
def upload_dataset_qualities(self, description):
941+
tmp = tempfile.NamedTemporaryFile(mode='w', delete=False)
942+
with tmp as fh:
943+
while True:
944+
chunk = connection.read(CHUNK)
945+
# Chunk is now a proper string (UTF-8 in python)
946+
chunk = chunk.decode(encoding)
947+
if not chunk:
948+
break
949+
fh.write(chunk)
950+
951+
tmp = open(tmp.name, "r")
952+
with tmp as fh:
953+
while True:
954+
chunk = fh.read(CHUNK)
955+
if not chunk:
956+
break
957+
string.write(chunk)
958+
959+
return return_code, string.getvalue()
960+
961+
def upload_dataset(self, description, filePath=None):
965962
try:
966963
data = {'description': description}
967-
return_code, dataset_xml = self._perform_api_call("openml.data.qualities.upload", data=data)
964+
return_code, dataset_xml = self._perform_api_call("openml.data.upload",data=data, filePath = filePath)
968965

969966
except URLError as e:
970967
# TODO logger.debug
971968
print(e)
972969
raise e
973970
return return_code, dataset_xml
974971

975-
def upload_implementation(self, description, binary, source):
972+
def upload_flow(self, description, binary, source):
976973
try:
977974
data = {'description': description, 'binary': binary, 'source': source}
978975
return_code, dataset_xml = self._perform_api_call("openml.implementation.upload", data=data)
@@ -997,14 +994,3 @@ def upload_run(self, description, files):
997994
raise e
998995
return return_code, dataset_xml
999996

1000-
def upload_file(self, file):
1001-
try:
1002-
data ={'file': file}
1003-
return_code, dataset_xml = self._perform_api_call("openml.file.upload", data=data)
1004-
1005-
except URLError as e:
1006-
# TODO logger.debug
1007-
print(e)
1008-
raise e
1009-
return return_code, dataset_xml
1010-

tests/test_apiconnector.py

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -241,53 +241,50 @@ def test_download_split(self):
241241

242242
def test_upload_dataset(self):
243243

244-
dataset = """@relation accelerometer
245-
246-
@attribute id {?}
247-
@attribute bag relational
248-
@attribute y numeric
249-
@attribute x numeric
250-
@attribute z numeric
251-
@end bag
252-
253-
@attribute class {A,B,C,?}
254-
255-
@data
256-
?,"3.18163375854,-1.96720916748,9.26677963257\n3.52741470337,-2.7294241333,9.70147567749\n
257-
4.42030792236,-0.964743804932,6.52074005127\n
258-
4.59963500977,-2.74214767456,8.6741619873\n5.19749176025,-1.80330001831,7.57110580444\n","?"
259-
"""
244+
dataset = self.connector.download_dataset(3)
245+
filePath = os.path.join(self.connector.dataset_cache_dir, "3", "dataset.arff")
246+
260247
description = """ <oml:data_set_description xmlns:oml="http://openml.org/openml">
261248
<oml:name>anneal</oml:name>
262249
<oml:version>1</oml:version>
263250
<oml:description>test</oml:description>
264251
<oml:format>ARFF</oml:format>
265-
<oml:upload_date>2014-04-06 23:19:24</oml:upload_date>
266252
<oml:licence>Public</oml:licence>
267-
<oml:url></oml:url>
268253
<oml:default_target_attribute>class</oml:default_target_attribute>
269254
<oml:md5_checksum></oml:md5_checksum>
270255
</oml:data_set_description>
271256
"""
272-
return_code, dataset_xml = self.connector.upload_dataset (description, dataset)
257+
return_code, dataset_xml = self.connector.upload_dataset (description, filePath)
273258
self.assertEqual(return_code, 200)
274259

275-
def test_upload_dataset_features(self):
276-
raise Exception()
260+
def test_upload_dataset_with_url(self):
277261

278-
def test_upload_dataset_qualities(self):
279-
280-
description = """ <oml:data_qualities xmlns:oml="http://openml.org/openml">
281-
<oml:did>1</oml:did>
282-
<oml:quality>
283-
<oml:name>NumberOfInstances</oml:name>
284-
<oml:value>898</oml:value>
285-
</oml:quality>
286-
</oml:data_qualities>
287-
"""
288-
return_code, dataset_xml = self.connector.upload_dataset_qualities(description)
262+
description = """ <oml:data_set_description xmlns:oml="http://openml.org/openml">
263+
<oml:name>UploadTestWithURL</oml:name>
264+
<oml:version>1</oml:version>
265+
<oml:description>test</oml:description>
266+
<oml:format>ARFF</oml:format>
267+
<oml:url>http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff</oml:url>
268+
</oml:data_set_description>
269+
"""
270+
return_code, dataset_xml = self.connector.upload_dataset (description)
289271
self.assertEqual(return_code, 200)
290272

273+
def test_upload_flow(self):
274+
275+
description = """ <oml:data_set_description xmlns:oml="http://openml.org/openml">
276+
<oml:name>UploadTestWithURL</oml:name>
277+
<oml:version>1</oml:version>
278+
<oml:description>test</oml:description>
279+
<oml:format>ARFF</oml:format>
280+
<oml:url>http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff</oml:url>
281+
</oml:data_set_description>
282+
"""
283+
return_code, dataset_xml = self.connector.upload_dataset (description)
284+
self.assertEqual(return_code, 200)
285+
286+
287+
291288

292289

293290

0 commit comments

Comments
 (0)