Skip to content

Commit 4d5d118

Browse files
authored
Merge pull request #372 from openml/improve_error_logging
Improve error logging
2 parents 32e98a6 + 200b56c commit 4d5d118

4 files changed

Lines changed: 39 additions & 5 deletions

File tree

openml/_api_calls.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def _read_url_files(url, data=None, file_dictionary=None, file_elements=None):
9595
# 'gzip,deflate'
9696
response = requests.post(url, data=data, files=file_elements)
9797
if response.status_code != 200:
98-
raise _parse_server_exception(response)
98+
raise _parse_server_exception(response, url=url)
9999
if 'Content-Encoding' not in response.headers or \
100100
response.headers['Content-Encoding'] != 'gzip':
101101
warnings.warn('Received uncompressed content from OpenML for %s.' % url)
@@ -117,14 +117,14 @@ def _read_url(url, data=None):
117117
response = requests.post(url, data=data)
118118

119119
if response.status_code != 200:
120-
raise _parse_server_exception(response)
120+
raise _parse_server_exception(response, url=url)
121121
if 'Content-Encoding' not in response.headers or \
122122
response.headers['Content-Encoding'] != 'gzip':
123123
warnings.warn('Received uncompressed content from OpenML for %s.' % url)
124124
return response.text
125125

126126

127-
def _parse_server_exception(response):
127+
def _parse_server_exception(response, url=None):
128128
# OpenML has a sopisticated error system
129129
# where information about failures is provided. try to parse this
130130
try:
@@ -143,4 +143,9 @@ def _parse_server_exception(response):
143143
# 512 for runs, 370 for datasets (should be 372), 500 for flows
144144
# 482 for tasks
145145
return OpenMLServerNoResult(code, message, additional)
146-
return OpenMLServerException(code, message, additional)
146+
return OpenMLServerException(
147+
code=code,
148+
message=message,
149+
additional=additional,
150+
url=url
151+
)

openml/datasets/functions.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import OrderedDict
2+
import hashlib
23
import io
34
import os
45
import re
@@ -365,6 +366,8 @@ def _get_dataset_arff(did_cache_dir, description):
365366
Location of arff file.
366367
"""
367368
output_file_path = os.path.join(did_cache_dir, "dataset.arff")
369+
md5_checksum_fixture = description.get("oml:md5_checksum")
370+
did = description.get("oml:id")
368371

369372
# This means the file is still there; whether it is useful is up to
370373
# the user and not checked by the program.
@@ -377,6 +380,14 @@ def _get_dataset_arff(did_cache_dir, description):
377380

378381
url = description['oml:url']
379382
arff_string = _read_url(url)
383+
md5 = hashlib.md5()
384+
md5.update(arff_string.encode('utf8'))
385+
md5_checksum = md5.hexdigest()
386+
if md5_checksum != md5_checksum_fixture:
387+
raise ValueError(
388+
'Checksum %s of downloaded dataset %d is unequal to the checksum '
389+
'%s sent by the server.' % (md5_checksum, did, md5_checksum_fixture)
390+
)
380391

381392
with io.open(output_file_path, "w", encoding='utf8') as fh:
382393
fh.write(arff_string)

openml/exceptions.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@ class OpenMLServerException(OpenMLServerError):
1616
"""exception for when the result of the server was
1717
not 200 (e.g., listing call w/o results). """
1818

19-
def __init__(self, code, message, additional=None):
19+
# Code needs to be optional to allow the exceptino to be picklable:
20+
# https://stackoverflow.com/questions/16244923/how-to-make-a-custom-exception-class-with-multiple-init-args-pickleable
21+
def __init__(self, message, code=None, additional=None, url=None):
22+
self.message = message
2023
self.code = code
2124
self.additional = additional
25+
self.url = url
2226
super(OpenMLServerException, self).__init__(message)
2327

2428

tests/test_datasets/test_dataset_functions.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,20 @@ def test__getarff_path_dataset_arff(self):
200200
self.assertIsInstance(arff_path, str)
201201
self.assertTrue(os.path.exists(arff_path))
202202

203+
def test__getarff_md5_issue(self):
204+
description = {
205+
'oml:id': 5,
206+
'oml:md5_checksum': 'abc',
207+
'oml:url': 'https://www.openml.org/data/download/61',
208+
}
209+
self.assertRaisesRegexp(
210+
ValueError,
211+
'Checksum ad484452702105cbf3d30f8deaba39a9 of downloaded dataset 5 '
212+
'is unequal to the checksum abc sent by the server.',
213+
_get_dataset_arff,
214+
self.workdir, description,
215+
)
216+
203217
def test__get_dataset_features(self):
204218
features = _get_dataset_features(self.workdir, 2)
205219
self.assertIsInstance(features, dict)

0 commit comments

Comments
 (0)