Skip to content

Commit 2b3ad49

Browse files
committed
Merge pull request #22 from openml/feature/utf8
UPDATE to work with openml issue #201
2 parents 73fa670 + 5639fba commit 2b3ad49

1 file changed

Lines changed: 2 additions & 5 deletions

File tree

openml/apiconnector.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,6 @@ def _get_cached_dataset_description(self, did):
258258
self._private_directory_datasets]:
259259
did_cache_dir = os.path.join(dataset_cache_dir, str(did))
260260
description_file = os.path.join(did_cache_dir, "description.xml")
261-
262261
try:
263262
with open(description_file) as fh:
264263
dataset_xml = fh.read()
@@ -882,19 +881,17 @@ def _read_url(self, url, data=None, file_dictionary=None):
882881
connection = urlopen(url, data=data)
883882
return_code = connection.getcode()
884883
content_type = connection.info()['Content-Type']
885-
# TODO maybe switch on the unicode flag!
886884
match = re.search(r'text/([\w-]*)(; charset=([\w-]*))?', content_type)
887885
if match:
888886
if match.groups()[2] is not None:
889887
encoding = match.group(3)
890888
else:
891-
encoding = "ascii"
889+
encoding = "utf8"
892890
else:
893891
# TODO ask JAN why this happens
894892
logger.warn("Data from %s has content type %s; going to treat "
895893
"this as ascii." % (url, content_type))
896-
encoding = "ascii"
897-
894+
encoding = "utf8"
898895
tmp = tempfile.NamedTemporaryFile(mode='w', delete=False)
899896
with tmp as fh:
900897
while True:

0 commit comments

Comments
 (0)