Skip to content

Commit 9ca14ec

Browse files
authored
Merge branch 'develop' into issue209
2 parents 929fec1 + a9ca0b8 commit 9ca14ec

22 files changed

Lines changed: 400 additions & 551 deletions

ci_scripts/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ source activate testenv
3131

3232
pip install matplotlib jupyter notebook nbconvert nbformat jupyter_client ipython ipykernel
3333
if [[ "$COVERAGE" == "true" ]]; then
34-
pip install coverage coveralls
34+
pip install codecov
3535
fi
3636

3737
python --version

ci_scripts/success.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ if [[ "$COVERAGE" == "true" ]]; then
99
# very reliable but we don't want travis to report a failure
1010
# in the github UI just because the coverage report failed to
1111
# be published.
12-
coveralls || echo "Coveralls upload failed"
12+
codecov || echo "Codecov upload failed"
1313
fi

openml/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,17 @@
1616
"""
1717
from . import config
1818

19-
from .datasets import OpenMLDataset
19+
from .datasets import OpenMLDataset, OpenMLDataFeature
2020
from . import datasets
2121
from . import runs
2222
from . import flows
2323
from .runs import OpenMLRun
2424
from .tasks import OpenMLTask, OpenMLSplit
2525
from .flows import OpenMLFlow
26-
from .utils import ConditionalImputer
2726

2827

2928
__version__ = "0.2.1"
3029

31-
__all__ = ['OpenMLDataset', 'OpenMLRun', 'OpenMLSplit', 'datasets',
32-
'OpenMLTask', 'OpenMLFlow', 'config', 'runs', 'flows']
30+
__all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
31+
'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
32+
'config', 'runs', 'flows']

openml/datasets/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
from .data_feature import OpenMLDataFeature
55

66
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
7-
'OpenMLDataset', 'list_datasets']
7+
'OpenMLDataset', 'OpenMLDataFeature', 'list_datasets']

openml/datasets/data_feature.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ class OpenMLDataFeature(object):
66
----------
77
index : int
88
The index of this feature
9-
name : string
9+
name : str
1010
Name of the feature
11-
data_type : string
11+
data_type : str
1212
can be nominal, numeric, string, date (corresponds to arff)
1313
nominal_values : list(str)
1414
list of the possible values, in case of nominal attribute
@@ -17,17 +17,18 @@ class OpenMLDataFeature(object):
1717
LEGAL_DATA_TYPES = ['nominal', 'numeric', 'string', 'date']
1818

1919
def __init__(self, index, name, data_type, nominal_values, number_missing_values):
20-
assert type(index) is int, "Index is of wrong datatype"
21-
assert type(name) is str, "Name is of wrong datatype"
22-
assert type(data_type) is str, "Data_type is of wrong datatype"
23-
assert data_type in self.LEGAL_DATA_TYPES, "data type should be in %s" %str(self.LEGAL_DATA_TYPES)
24-
if nominal_values is not None:
25-
assert type(nominal_values) is list, "Nominal_values is of wrong datatype"
26-
assert type(number_missing_values) is int, "number_missing_values is of wrong datatype"
20+
if type(index) != int:
21+
raise ValueError('Index is of wrong datatype')
22+
if data_type not in self.LEGAL_DATA_TYPES:
23+
raise ValueError('data type should be in %s, found: %s' %(str(self.LEGAL_DATA_TYPES),data_type))
24+
if nominal_values is not None and type(nominal_values) != list:
25+
raise ValueError('Nominal_values is of wrong datatype')
26+
if type(number_missing_values) != int:
27+
raise ValueError('number_missing_values is of wrong datatype')
2728

2829
self.index = index
29-
self.name = name
30-
self.data_type = data_type
30+
self.name = str(name)
31+
self.data_type = str(data_type)
3132
self.nominal_values = nominal_values
3233
self.number_missing_values = number_missing_values
3334

openml/datasets/dataset.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import io
33
import logging
44
import os
5+
import six
56
import sys
67

78
import arff
@@ -10,7 +11,7 @@
1011
import scipy.sparse
1112
import xmltodict
1213

13-
from ..datasets.data_feature import OpenMLDataFeature
14+
from .data_feature import OpenMLDataFeature
1415
from ..exceptions import PyOpenMLError
1516

1617
if sys.version_info[0] >= 3:
@@ -65,10 +66,14 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
6566
self.default_target_attribute = default_target_attribute
6667
self.row_id_attribute = row_id_attribute
6768
self.ignore_attributes = None
68-
if isinstance(ignore_attribute, str):
69+
if isinstance(ignore_attribute, six.string_types):
6970
self.ignore_attributes = [ignore_attribute]
7071
elif isinstance(ignore_attribute, list):
7172
self.ignore_attributes = ignore_attribute
73+
elif ignore_attribute is None:
74+
pass
75+
else:
76+
raise ValueError('wrong data type for ignore_attribute. Should be list. ')
7277
self.version_label = version_label
7378
self.citation = citation
7479
self.tag = tag
@@ -88,7 +93,8 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
8893
xmlfeature['oml:data_type'],
8994
None, #todo add nominal values (currently not in database)
9095
int(xmlfeature['oml:number_of_missing_values']))
91-
assert idx == feature.index, "Data features not provided in right order"
96+
if idx != feature.index:
97+
raise ValueError('Data features not provided in right order')
9298
self.features[feature.index] = feature
9399

94100

@@ -313,15 +319,40 @@ def retrieve_class_labels(self, target_name='class'):
313319
return None
314320

315321

316-
def get_features_by_type(self, data_type, exclude=None, exclude_ignore_attributes=True, exclude_row_id_attribute=True):
322+
def get_features_by_type(self, data_type, exclude=None,
323+
exclude_ignore_attributes=True,
324+
exclude_row_id_attribute=True):
325+
'''
326+
Returns indices of features of a given type, e.g., all nominal features.
327+
Can use additional parameters to exclude various features by index or ontology.
328+
329+
Parameters
330+
----------
331+
data_type : str
332+
The data type to return (e.g., nominal, numeric, date, string)
333+
exclude : list(int)
334+
Indices to exclude (and adapt the return values as if these indices
335+
are not present)
336+
exclude_ignore_attributes : bool
337+
Whether to exclude the defined ignore attributes (and adapt the
338+
return values as if these indices are not present)
339+
exclude_row_id_attribute : bool
340+
Whether to exclude the defined row id attributes (and adapt the
341+
return values as if these indices are not present)
342+
343+
Returns
344+
-------
345+
result : list
346+
a list of indices that have the specified data type
347+
'''
317348
assert data_type in OpenMLDataFeature.LEGAL_DATA_TYPES, "Illegal feature type requested"
318349
if self.ignore_attributes is not None:
319350
assert type(self.ignore_attributes) is list, "ignore_attributes should be a list"
320351
if self.row_id_attribute is not None:
321352
assert type(self.row_id_attribute) is str, "row id attribute should be a str"
322353
if exclude is not None:
323354
assert type(exclude) is list, "Exclude should be a list"
324-
assert all(isinstance(elem, str) for elem in exclude), "Exclude should be a list of strings"
355+
# assert all(isinstance(elem, str) for elem in exclude), "Exclude should be a list of strings"
325356
to_exclude = []
326357
if exclude is not None:
327358
to_exclude.extend(exclude)

openml/exceptions.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,19 @@ class PyOpenMLError(Exception):
22
def __init__(self, message):
33
super(PyOpenMLError, self).__init__(message)
44

5-
# class for when something is really wrong on the server (result did not parse to dict)
65
class OpenMLServerError(PyOpenMLError):
7-
"""Server didn't respond 200, contains unparsed error."""
6+
"""class for when something is really wrong on the server
7+
(result did not parse to dict), contains unparsed error."""
8+
89
def __init__(self, message):
910
message = "OpenML Server error: " + message
1011
super(OpenMLServerError, self).__init__(message)
1112

12-
# class for when the result of the server was not 200 (e.g., listing call w/o results)
13+
#
1314
class OpenMLServerException(OpenMLServerError):
14-
"""Server didn't respond 200."""
15+
"""exception for when the result of the server was
16+
not 200 (e.g., listing call w/o results). """
17+
1518
def __init__(self, code, message, additional=None):
1619
self.code = code
1720
self.additional = additional

openml/flows/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from .flow import OpenMLFlow
22
from .sklearn_converter import sklearn_to_flow, flow_to_sklearn
3-
from .functions import get_flow, list_flows
3+
from .functions import get_flow, list_flows, flow_exists
44

55
__all__ = ['OpenMLFlow', 'create_flow_from_model', 'get_flow', 'list_flows',
6-
'sklearn_to_flow', 'flow_to_sklearn']
6+
'sklearn_to_flow', 'flow_to_sklearn', 'flow_exists']

openml/flows/flow.py

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -340,58 +340,6 @@ def publish(self):
340340
self.flow_id = int(xmltodict.parse(return_value)['oml:upload_flow']['oml:id'])
341341
return self
342342

343-
def _ensure_flow_exists(self):
344-
""" Checks if a flow exists for the given model and possibly creates it.
345-
346-
If the given flow exists on the server, the flow-id will simply
347-
be returned. Otherwise it will be uploaded to the server.
348-
349-
Returns
350-
-------
351-
flow_id : int
352-
Flow id on the server.
353-
"""
354-
_, flow_id = _check_flow_exists(self.name, self.external_version)
355-
# TODO add numpy and scipy version!
356-
357-
if int(flow_id) == -1:
358-
flow = self.publish()
359-
return int(flow.flow_id)
360-
361-
return int(flow_id)
362-
363-
364-
def _check_flow_exists(name, version):
365-
"""Retrieves the flow id of the flow uniquely identified by name+version.
366-
367-
Parameter
368-
---------
369-
name : string
370-
Name of the flow
371-
version : string
372-
Version information associated with flow.
373-
374-
Returns
375-
-------
376-
flow_exist : int
377-
Flow id or -1 if the flow doesn't exist.
378-
379-
Notes
380-
-----
381-
see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
382-
"""
383-
if not (type(name) is str and len(name) > 0):
384-
raise ValueError('Argument \'name\' should be a non-empty string')
385-
if not (type(version) is str and len(version) > 0):
386-
raise ValueError('Argument \'version\' should be a non-empty string')
387-
388-
xml_response = _perform_api_call("flow/exists",
389-
data={'name': name, 'external_version': version})
390-
391-
xml_dict = xmltodict.parse(xml_response)
392-
flow_id = xml_dict['oml:flow_exists']['oml:id']
393-
return xml_response, flow_id
394-
395343

396344
def _add_if_nonempty(dic, key, value):
397345
if value is not None:

openml/flows/functions.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import xmltodict
2+
import six
23

34
from openml._api_calls import _perform_api_call
45
from . import OpenMLFlow, flow_to_sklearn
@@ -66,10 +67,45 @@ def list_flows(offset=None, size=None, tag=None):
6667
if tag is not None:
6768
api_call += "/tag/%s" % tag
6869

69-
return _list_datasets(api_call)
70+
return _list_flows(api_call)
7071

7172

72-
def _list_datasets(api_call):
73+
def flow_exists(name, external_version):
74+
"""Retrieves the flow id of the flow uniquely identified by name + external_version.
75+
76+
Parameter
77+
---------
78+
name : string
79+
Name of the flow
80+
version : string
81+
Version information associated with flow.
82+
83+
Returns
84+
-------
85+
flow_exist : int
86+
flow id iff exists, False otherwise
87+
88+
Notes
89+
-----
90+
see http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
91+
"""
92+
if not (isinstance(name, six.string_types) and len(name) > 0):
93+
raise ValueError('Argument \'name\' should be a non-empty string')
94+
if not (isinstance(name, six.string_types) and len(external_version) > 0):
95+
raise ValueError('Argument \'version\' should be a non-empty string')
96+
97+
xml_response = _perform_api_call("flow/exists",
98+
data={'name': name, 'external_version': external_version})
99+
100+
result_dict = xmltodict.parse(xml_response)
101+
flow_id = int(result_dict['oml:flow_exists']['oml:id'])
102+
if flow_id > 0:
103+
return flow_id
104+
else:
105+
return False
106+
107+
108+
def _list_flows(api_call):
73109
# TODO add proper error handling here!
74110
xml_string = _perform_api_call(api_call)
75111
flows_dict = xmltodict.parse(xml_string)

0 commit comments

Comments
 (0)