Skip to content

Commit deb769e

Browse files
ArlindKadramfeurer
authored andcommitted
Fix #378 (#418)
* Fix ascii decoding problem with python 2 * Created separate pickle files for splits and datasets according to the python version * Added production task to unit test * Update test_task_functions.py
1 parent f4387d6 commit deb769e

4 files changed

Lines changed: 25 additions & 6 deletions

File tree

openml/datasets/data_feature.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import six
12

23
class OpenMLDataFeature(object):
34
"""Data Feature (a.k.a. Attribute) object.
@@ -29,7 +30,11 @@ def __init__(self, index, name, data_type, nominal_values,
2930
raise ValueError('number_missing_values is of wrong datatype')
3031

3132
self.index = index
32-
self.name = str(name)
33+
# In case of python version lower than 3, change the default ASCII encoder.
34+
if six.PY2:
35+
self.name = str(name.encode('utf8'))
36+
else:
37+
self.name = str(name)
3338
self.data_type = str(data_type)
3439
self.nominal_values = nominal_values
3540
self.number_missing_values = number_missing_values

openml/datasets/dataset.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,10 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
9191

9292
if data_file is not None:
9393
if self._data_features_supported():
94-
self.data_pickle_file = data_file.replace('.arff', '.pkl')
94+
if six.PY2:
95+
self.data_pickle_file = data_file.replace('.arff', '.pkl.py2')
96+
else:
97+
self.data_pickle_file = data_file.replace('.arff', '.pkl.py3')
9598

9699
if os.path.exists(self.data_pickle_file):
97100
logger.debug("Data pickle file already exists.")

openml/tasks/split.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections import namedtuple, OrderedDict
22
import os
3-
import sys
3+
import six
44

55
import numpy as np
66
import scipy.io.arff
@@ -60,11 +60,18 @@ def __eq__(self, other):
6060
@classmethod
6161
def _from_arff_file(cls, filename, cache=True):
6262
repetitions = None
63-
pkl_filename = filename.replace(".arff", ".pkl")
63+
if six.PY2:
64+
pkl_filename = filename.replace(".arff", ".pkl.py2")
65+
else:
66+
pkl_filename = filename.replace(".arff", ".pkl.py3")
6467
if cache:
6568
if os.path.exists(pkl_filename):
66-
with open(pkl_filename, "rb") as fh:
67-
_ = pickle.load(fh)
69+
try:
70+
with open(pkl_filename, "rb") as fh:
71+
_ = pickle.load(fh)
72+
except UnicodeDecodeError as e:
73+
# Possibly pickle file was created with python2 and python3 is being used to load the data
74+
raise e
6875
repetitions = _["repetitions"]
6976
name = _["name"]
7077

tests/test_tasks/test_task_functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ def test_list_tasks_per_type_paginate(self):
111111
def test__get_task(self):
112112
openml.config.set_cache_directory(self.static_cache_dir)
113113
task = openml.tasks.get_task(1882)
114+
# Test the following task as it used to throw an Unicode Error.
115+
# https://github.com/openml/openml-python/issues/378
116+
openml.config.server = self.production_server
117+
production_task = openml.tasks.get_task(34536)
114118

115119
def test_get_task(self):
116120
task = openml.tasks.get_task(1)

0 commit comments

Comments
 (0)