Skip to content

Commit c7816ea

Browse files
authored
Merge pull request #263 from openml/develop
Dump development into master
2 parents 9ea6624 + 3231e63 commit c7816ea

44 files changed

Lines changed: 22853 additions & 605 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.travis.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ env:
1515
- TEST_DIR=/tmp/test_dir/
1616
- MODULE=openml
1717
matrix:
18-
- DISTRIB="conda" PYTHON_VERSION="2.7" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.21" SKLEARN_VERSION="0.18"
19-
- DISTRIB="conda" PYTHON_VERSION="3.4" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18"
20-
- DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18"
18+
- DISTRIB="conda" PYTHON_VERSION="2.7" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.21" SKLEARN_VERSION="0.18.1"
19+
- DISTRIB="conda" PYTHON_VERSION="3.4" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18.1"
20+
- DISTRIB="conda" PYTHON_VERSION="3.5" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18.1"
21+
- DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.25.2" SKLEARN_VERSION="0.18.1"
22+
2123
install: source ci_scripts/install.sh
2224
script: bash ci_scripts/test.sh
2325
after_success: source ci_scripts/success.sh

ci_scripts/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ source activate testenv
3131

3232
pip install matplotlib jupyter notebook nbconvert nbformat jupyter_client ipython ipykernel
3333
if [[ "$COVERAGE" == "true" ]]; then
34-
pip install coverage coveralls
34+
pip install codecov
3535
fi
3636

3737
python --version

ci_scripts/success.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ if [[ "$COVERAGE" == "true" ]]; then
99
# very reliable but we don't want travis to report a failure
1010
# in the github UI just because the coverage report failed to
1111
# be published.
12-
coveralls || echo "Coveralls upload failed"
12+
codecov || echo "Codecov upload failed"
1313
fi

openml/__init__.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,55 @@
1616
"""
1717
from . import config
1818

19-
from .datasets import OpenMLDataset
19+
from .datasets import OpenMLDataset, OpenMLDataFeature
2020
from . import datasets
21+
from . import tasks
2122
from . import runs
2223
from . import flows
24+
from . import setups
2325
from .runs import OpenMLRun
2426
from .tasks import OpenMLTask, OpenMLSplit
2527
from .flows import OpenMLFlow
2628

29+
__version__ = "0.4.0dev"
2730

28-
__version__ = "0.2.1"
2931

30-
__all__ = ['OpenMLDataset', 'OpenMLRun', 'OpenMLSplit', 'datasets',
31-
'OpenMLTask', 'OpenMLFlow', 'config', 'runs', 'flows']
32+
def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
33+
run_ids=None):
34+
"""
35+
Populate a cache for offline and parallel usage of the OpenML connector.
36+
37+
Parameters
38+
----------
39+
task_ids : iterable
40+
41+
dataset_ids : iterable
42+
43+
flow_ids : iterable
44+
45+
run_ids : iterable
46+
47+
Returns
48+
-------
49+
None
50+
"""
51+
if task_ids is not None:
52+
for task_id in task_ids:
53+
tasks.functions.get_task(task_id)
54+
55+
if dataset_ids is not None:
56+
for dataset_id in dataset_ids:
57+
datasets.functions.get_dataset(dataset_id)
58+
59+
if flow_ids is not None:
60+
for flow_id in flow_ids:
61+
flows.functions.get_flow(flow_id)
62+
63+
if run_ids is not None:
64+
for run_id in run_ids:
65+
runs.functions.get_run(run_id)
66+
67+
68+
__all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
69+
'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
70+
'config', 'runs', 'flows', 'tasks', 'setups']

openml/_api_calls.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import io
22
import os
33
import requests
4-
import arff
54
import warnings
65

6+
import arff
7+
import xmltodict
8+
79
from . import config
8-
from .exceptions import OpenMLServerError
10+
from .exceptions import OpenMLServerError, OpenMLServerException
911

1012

1113
def _perform_api_call(call, data=None, file_dictionary=None,
@@ -50,6 +52,18 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
5052
return _read_url(url, data)
5153

5254

55+
def _file_id_to_url(file_id, filename=None):
56+
'''
57+
Presents the URL how to download a given file id
58+
filename is optional
59+
'''
60+
openml_url = config.server.split('/api/')
61+
url = openml_url[0] + '/data/download/%s' %file_id
62+
if filename is not None:
63+
url += '/' + filename
64+
return url
65+
66+
5367
def _read_url_files(url, data=None, file_dictionary=None, file_elements=None):
5468
"""do a post request to url with data, file content of
5569
file_dictionary and sending file_elements as files"""
@@ -80,7 +94,7 @@ def _read_url_files(url, data=None, file_dictionary=None, file_elements=None):
8094
# 'gzip,deflate'
8195
response = requests.post(url, data=data, files=file_elements)
8296
if response.status_code != 200:
83-
raise OpenMLServerError(('Status code: %d\n' % response.status_code) + response.text)
97+
raise _parse_server_exception(response)
8498
if 'Content-Encoding' not in response.headers or \
8599
response.headers['Content-Encoding'] != 'gzip':
86100
warnings.warn('Received uncompressed content from OpenML for %s.' % url)
@@ -97,8 +111,25 @@ def _read_url(url, data=None):
97111
response = requests.post(url, data=data)
98112

99113
if response.status_code != 200:
100-
raise OpenMLServerError(('Status code: %d\n' % response.status_code) + response.text)
114+
raise _parse_server_exception(response)
101115
if 'Content-Encoding' not in response.headers or \
102116
response.headers['Content-Encoding'] != 'gzip':
103117
warnings.warn('Received uncompressed content from OpenML for %s.' % url)
104118
return response.text
119+
120+
def _parse_server_exception(response):
121+
# OpenML has a sopisticated error system
122+
# where information about failures is provided. try to parse this
123+
try:
124+
server_exception = xmltodict.parse(response.text)
125+
except:
126+
raise OpenMLServerError(('Unexpected server error. Please '
127+
'contact the developers!\nStatus code: '
128+
'%d\n' % response.status_code) + response.text)
129+
130+
code = int(server_exception['oml:error']['oml:code'])
131+
message = server_exception['oml:error']['oml:message']
132+
additional = None
133+
if 'oml:additional_information' in server_exception['oml:error']:
134+
additional = server_exception['oml:error']['oml:additional_information']
135+
return OpenMLServerException(code, message, additional)

openml/config.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
"""
22
Stores module level information like the API key, cache directory and the server.
33
"""
4-
import os
5-
import sys
64
import logging
5+
import os
6+
7+
from six import StringIO
8+
from six.moves import configparser
9+
710

811
logger = logging.getLogger(__name__)
912
logging.basicConfig(
@@ -15,12 +18,7 @@
1518
cachedir = ""
1619

1720

18-
if sys.version_info[0] < 3:
19-
import ConfigParser as configparser
20-
from StringIO import StringIO
21-
else:
22-
import configparser
23-
from io import StringIO
21+
2422

2523

2624
def _setup():
@@ -36,6 +34,7 @@ def _setup():
3634
"""
3735
global apikey
3836
global server
37+
global avoid_duplicate_runs
3938
# read config file, create cache directory
4039
try:
4140
os.mkdir(os.path.expanduser('~/.openml'))
@@ -46,6 +45,7 @@ def _setup():
4645
apikey = config.get('FAKE_SECTION', 'apikey')
4746
server = config.get('FAKE_SECTION', 'server')
4847
cache_dir = config.get('FAKE_SECTION', 'cachedir')
48+
avoid_duplicate_runs = config.getboolean('FAKE_SECTION', 'avoid_duplicate_runs')
4949
set_cache_directory(cache_dir)
5050

5151

@@ -84,7 +84,8 @@ def _parse_config():
8484
defaults = {'apikey': apikey,
8585
'server': server,
8686
'verbosity': 0,
87-
'cachedir': os.path.expanduser('~/.openml/cache')}
87+
'cachedir': os.path.expanduser('~/.openml/cache'),
88+
'avoid_duplicate_runs': 'True'}
8889

8990
config_file = os.path.expanduser('~/.openml/config')
9091
config = configparser.RawConfigParser(defaults=defaults)

openml/datasets/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from .functions import (list_datasets, check_datasets_active,
22
get_datasets, get_dataset)
33
from .dataset import OpenMLDataset
4+
from .data_feature import OpenMLDataFeature
45

56
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
6-
'OpenMLDataset', 'list_datasets']
7+
'OpenMLDataset', 'OpenMLDataFeature', 'list_datasets']

openml/datasets/data_feature.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
class OpenMLDataFeature(object):
3+
"""Data Feature (a.k.a. Attribute) object.
4+
5+
Parameters
6+
----------
7+
index : int
8+
The index of this feature
9+
name : str
10+
Name of the feature
11+
data_type : str
12+
can be nominal, numeric, string, date (corresponds to arff)
13+
nominal_values : list(str)
14+
list of the possible values, in case of nominal attribute
15+
number_missing_values : int
16+
"""
17+
LEGAL_DATA_TYPES = ['nominal', 'numeric', 'string', 'date']
18+
19+
def __init__(self, index, name, data_type, nominal_values, number_missing_values):
20+
if type(index) != int:
21+
raise ValueError('Index is of wrong datatype')
22+
if data_type not in self.LEGAL_DATA_TYPES:
23+
raise ValueError('data type should be in %s, found: %s' %(str(self.LEGAL_DATA_TYPES),data_type))
24+
if nominal_values is not None and type(nominal_values) != list:
25+
raise ValueError('Nominal_values is of wrong datatype')
26+
if type(number_missing_values) != int:
27+
raise ValueError('number_missing_values is of wrong datatype')
28+
29+
self.index = index
30+
self.name = str(name)
31+
self.data_type = str(data_type)
32+
self.nominal_values = nominal_values
33+
self.number_missing_values = number_missing_values
34+
35+
def __str__(self):
36+
return "[%d - %s (%s)]" %(self.index, self.name, self.data_type)

0 commit comments

Comments
 (0)