Skip to content

Commit 19223f9

Browse files
authored
Merge branch 'develop' into joaquinvanschoren-patch-1
2 parents 24a39e9 + b30192a commit 19223f9

33 files changed

Lines changed: 2410 additions & 1776 deletions

examples/OpenMLDemo.ipynb

Lines changed: 0 additions & 703 deletions
This file was deleted.

examples/OpenML_Tutorial.ipynb

Lines changed: 1344 additions & 0 deletions
Large diffs are not rendered by default.

examples/PyOpenML.ipynb

Lines changed: 0 additions & 862 deletions
This file was deleted.

openml/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@
2222
from . import runs
2323
from . import flows
2424
from . import setups
25+
from . import study
26+
from . import evaluations
2527
from .runs import OpenMLRun
2628
from .tasks import OpenMLTask, OpenMLSplit
2729
from .flows import OpenMLFlow
2830

29-
__version__ = "0.4.0dev"
31+
from .__version__ import __version__
3032

3133

3234
def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
@@ -66,5 +68,6 @@ def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
6668

6769

6870
__all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
69-
'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
71+
'OpenMLSplit', 'OpenMLEvaluation', 'OpenMLSetup',
72+
'OpenMLTask', 'OpenMLFlow', 'datasets', 'evaluations',
7073
'config', 'runs', 'flows', 'tasks', 'setups']

openml/__version__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Version information."""
2+
3+
# The following line *must* be the last in the module, exactly as formatted:
4+
__version__ = "0.5.0dev"

openml/_api_calls.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def _read_url(url, data=None):
117117
warnings.warn('Received uncompressed content from OpenML for %s.' % url)
118118
return response.text
119119

120+
120121
def _parse_server_exception(response):
121122
# OpenML has a sopisticated error system
122123
# where information about failures is provided. try to parse this

openml/datasets/dataset.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
3939
row_id_attribute=None, ignore_attribute=None,
4040
version_label=None, citation=None, tag=None, visibility=None,
4141
original_data_url=None, paper_url=None, update_comment=None,
42-
md5_checksum=None, data_file=None, features=None):
42+
md5_checksum=None, data_file=None, features=None, qualities=None):
4343
# Attributes received by querying the RESTful API
4444
self.dataset_id = int(dataset_id) if dataset_id is not None else None
4545
self.name = name
@@ -74,6 +74,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
7474
self.md5_cheksum = md5_checksum
7575
self.data_file = data_file
7676
self.features = None
77+
self.qualities = None
7778

7879
if features is not None:
7980
self.features = {}
@@ -87,6 +88,12 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
8788
raise ValueError('Data features not provided in right order')
8889
self.features[feature.index] = feature
8990

91+
if qualities is not None:
92+
self.qualities = {}
93+
for idx, xmlquality in enumerate(qualities['oml:quality']):
94+
name = xmlquality['oml:name']
95+
value = xmlquality['oml:value']
96+
self.qualities[name] = value
9097

9198
if data_file is not None:
9299
if self._data_features_supported():

openml/datasets/functions.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def _get_cached_dataset(dataset_id):
7575
description = _get_cached_dataset_description(dataset_id)
7676
arff_file = _get_cached_dataset_arff(dataset_id)
7777
features = _get_cached_dataset_features(dataset_id)
78-
dataset = _create_dataset_from_description(description, features, arff_file)
78+
qualities = _get_cached_dataset_qualities(dataset_id)
79+
dataset = _create_dataset_from_description(description, features, qualities, arff_file)
7980

8081
return dataset
8182

@@ -107,6 +108,19 @@ def _get_cached_dataset_features(dataset_id):
107108
"cached" % dataset_id)
108109

109110

111+
def _get_cached_dataset_qualities(dataset_id):
112+
cache_dir = config.get_cache_directory()
113+
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
114+
qualities_file = os.path.join(did_cache_dir, "qualities.xml")
115+
try:
116+
with io.open(qualities_file, encoding='utf8') as fh:
117+
qualities_xml = fh.read()
118+
return xmltodict.parse(qualities_xml)["oml:data_qualities"]
119+
except (IOError, OSError):
120+
raise OpenMLCacheException("Dataset qualities for dataset id %d not "
121+
"cached" % dataset_id)
122+
123+
110124
def _get_cached_dataset_arff(dataset_id):
111125
cache_dir = config.get_cache_directory()
112126
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
@@ -272,7 +286,7 @@ def get_dataset(dataset_id):
272286
_remove_dataset_cache_dir(did_cache_dir)
273287
raise e
274288

275-
dataset = _create_dataset_from_description(description, features, arff_file)
289+
dataset = _create_dataset_from_description(description, features, qualities, arff_file)
276290
return dataset
277291

278292

@@ -470,7 +484,7 @@ def _remove_dataset_cache_dir(did_cache_dir):
470484
'Please do this manually!' % did_cache_dir)
471485

472486

473-
def _create_dataset_from_description(description, features, arff_file):
487+
def _create_dataset_from_description(description, features, qualities, arff_file):
474488
"""Create a dataset object from a description dict.
475489
476490
Parameters
@@ -510,5 +524,6 @@ def _create_dataset_from_description(description, features, arff_file):
510524
description.get("oml:update_comment"),
511525
description.get("oml:md5_checksum"),
512526
data_file=arff_file,
513-
features=features)
527+
features=features,
528+
qualities=qualities)
514529
return dataset

openml/evaluations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .evaluation import OpenMLEvaluation
2+
from .functions import list_evaluations

openml/evaluations/evaluation.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
2+
class OpenMLEvaluation(object):
3+
'''
4+
Contains all meta-information about a run / evaluation combination,
5+
according to the evaluation/list function
6+
7+
Parameters
8+
----------
9+
run_id : int
10+
task_id : int
11+
setup_id : int
12+
flow_id : int
13+
flow_name : str
14+
data_id : int
15+
data_name : str
16+
the name of the dataset
17+
function : str
18+
the evaluation function of this item (e.g., accuracy)
19+
upload_time : str
20+
the time of evaluation
21+
value : float
22+
the value of this evaluation
23+
array_data : str
24+
list of information per class (e.g., in case of precision, auroc, recall)
25+
'''
26+
def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
27+
data_id, data_name, function, upload_time, value,
28+
array_data=None):
29+
self.run_id = run_id
30+
self.task_id = task_id
31+
self.setup_id = setup_id
32+
self.flow_id = flow_id
33+
self.flow_name = flow_name
34+
self.data_id = data_id
35+
self.data_name = data_name
36+
self.function = function
37+
self.upload_time = upload_time
38+
self.value = value
39+
self.array_data = array_data
40+

0 commit comments

Comments
 (0)