Skip to content

Commit d5434d4

Browse files
authored
Merge branch 'develop' into list_datasets_docsting
2 parents 03c9955 + 735026c commit d5434d4

18 files changed

Lines changed: 419 additions & 229 deletions

File tree

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,3 @@ target/
7373
# IDE
7474
.idea
7575
*.swp
76-
77-
# Other
78-
*.pkl

doc/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@
6363

6464
# General information about the project.
6565
project = u'OpenML'
66-
copyright = u'2014-2016, Matthias Feurer, Andreas Müller, Farzan Majdani, ' \
67-
u'Joaquin Vanschoren and Pieter Gijsbers'
66+
copyright = u'2014-2017, Matthias Feurer, Andreas Müller, Farzan Majdani, ' \
67+
u'Joaquin Vanschoren, Jan van Rijn and Pieter Gijsbers'
6868

6969
# The version info for the project you're documenting, acts as replacement for
7070
# |version| and |release|, also used in various other places throughout the

doc/usage.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ We can now also inspect the flow object which was automatically created:
513513
.. code:: python
514514
515515
>>> flow = openml.flows.get_flow(run.flow_id)
516-
>>> pprint(vars(flow), depth=2)
516+
>>> pprint(vars(flow), depth=2) # doctest: +SKIP
517517
{'binary_format': None,
518518
'binary_md5': None,
519519
'binary_url': None,
@@ -522,7 +522,7 @@ We can now also inspect the flow object which was automatically created:
522522
'custom_name': None,
523523
'dependencies': 'sklearn==0.18.2\nnumpy>=1.6.1\nscipy>=0.9',
524524
'description': 'Automatically created scikit-learn flow.',
525-
'external_version': 'openml==0.6.0dev,sklearn==0.18.2',
525+
'external_version': 'openml==0.6.0,sklearn==0.18.2',
526526
'flow_id': 7245,
527527
'language': 'English',
528528
'model': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',

openml/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Version information."""
22

33
# The following line *must* be the last in the module, exactly as formatted:
4-
__version__ = "0.6.0dev"
4+
__version__ = "0.6.0"

openml/datasets/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def get_data(self, target=None, target_dtype=int, include_row_id=False,
205205

206206
path = self.data_pickle_file
207207
if not os.path.exists(path):
208-
raise ValueError("Cannot find a ndarray file for dataset %s at "
208+
raise ValueError("Cannot find a pickle file for dataset %s at "
209209
"location %s " % (self.name, path))
210210
else:
211211
with open(path, "rb") as fh:
@@ -425,4 +425,4 @@ def _data_features_supported(self):
425425
if self.features[idx].data_type not in ['numeric', 'nominal']:
426426
return False
427427
return True
428-
return True
428+
return True

openml/datasets/functions.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def list_datasets(offset=None, size=None, tag=None):
179179
def _list_datasets(api_call):
180180
# TODO add proper error handling here!
181181
xml_string = _perform_api_call(api_call)
182-
datasets_dict = xmltodict.parse(xml_string)
182+
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
183183

184184
# Minimalistic check if the XML is useful
185185
assert type(datasets_dict['oml:data']['oml:dataset']) == list, \
@@ -337,9 +337,6 @@ def _get_dataset_description(did_cache_dir, dataset_id):
337337
description = xmltodict.parse(dataset_xml)[
338338
"oml:data_set_description"]
339339

340-
with io.open(description_file, "w", encoding='utf8') as fh:
341-
fh.write(dataset_xml)
342-
343340
return description
344341

345342

@@ -418,7 +415,7 @@ def _get_dataset_features(did_cache_dir, dataset_id):
418415
with io.open(features_file, "w", encoding='utf8') as fh:
419416
fh.write(features_xml)
420417

421-
features = xmltodict.parse(features_xml)["oml:data_features"]
418+
features = xmltodict.parse(features_xml, force_list=('oml:feature',))["oml:data_features"]
422419

423420
return features
424421

@@ -454,7 +451,7 @@ def _get_dataset_qualities(did_cache_dir, dataset_id):
454451
with io.open(qualities_file, "w", encoding='utf8') as fh:
455452
fh.write(qualities_xml)
456453

457-
qualities = xmltodict.parse(qualities_xml)['oml:data_qualities']
454+
qualities = xmltodict.parse(qualities_xml, force_list=('oml:quality',))['oml:data_qualities']
458455

459456
return qualities
460457

@@ -479,13 +476,17 @@ def _create_dataset_cache_directory(dataset_id):
479476
str
480477
Path of the created dataset cache directory.
481478
"""
482-
dataset_cache_dir = os.path.join(config.get_cache_directory(), "datasets",
483-
str(dataset_id))
484-
try:
485-
os.makedirs(dataset_cache_dir)
486-
except (OSError, IOError):
487-
# TODO add debug information!
479+
dataset_cache_dir = os.path.join(
480+
config.get_cache_directory(),
481+
"datasets",
482+
str(dataset_id),
483+
)
484+
if os.path.exists(dataset_cache_dir) and os.path.isdir(dataset_cache_dir):
488485
pass
486+
elif os.path.exists(dataset_cache_dir) and not os.path.isdir(dataset_cache_dir):
487+
raise ValueError('Dataset cache dir exists but is not a directory!')
488+
else:
489+
os.makedirs(dataset_cache_dir)
489490
return dataset_cache_dir
490491

491492

@@ -498,13 +499,10 @@ def _remove_dataset_cache_dir(did_cache_dir):
498499
----------
499500
"""
500501
try:
501-
os.rmdir(did_cache_dir)
502+
shutil.rmtree(did_cache_dir)
502503
except (OSError, IOError):
503-
try:
504-
shutil.rmtree(did_cache_dir)
505-
except (OSError, IOError):
506-
raise ValueError('Cannot remove faulty dataset cache directory %s.'
507-
'Please do this manually!' % did_cache_dir)
504+
raise ValueError('Cannot remove faulty dataset cache directory %s.'
505+
'Please do this manually!' % did_cache_dir)
508506

509507

510508
def _create_dataset_from_description(description, features, qualities, arff_file):

openml/evaluations/functions.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,21 +61,17 @@ def _list_evaluations(api_call):
6161

6262
xml_string = _perform_api_call(api_call)
6363

64-
evals_dict = xmltodict.parse(xml_string)
64+
evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
6565
# Minimalistic check if the XML is useful
6666
if 'oml:evaluations' not in evals_dict:
6767
raise ValueError('Error in return XML, does not contain "oml:evaluations": %s'
6868
% str(evals_dict))
6969

70-
if isinstance(evals_dict['oml:evaluations']['oml:evaluation'], list):
71-
evals_list = evals_dict['oml:evaluations']['oml:evaluation']
72-
elif isinstance(evals_dict['oml:evaluations']['oml:evaluation'], dict):
73-
evals_list = [evals_dict['oml:evaluations']['oml:evaluation']]
74-
else:
75-
raise TypeError()
70+
assert type(evals_dict['oml:evaluations']['oml:evaluation']) == list, \
71+
type(evals_dict['oml:evaluations'])
7672

7773
evals = dict()
78-
for eval_ in evals_list:
74+
for eval_ in evals_dict['oml:evaluations']['oml:evaluation']:
7975
run_id = int(eval_['oml:run_id'])
8076
array_data = None
8177
if 'oml:array_data' in eval_:

openml/flows/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def flow_exists(name, external_version):
107107
def _list_flows(api_call):
108108
# TODO add proper error handling here!
109109
xml_string = _perform_api_call(api_call)
110-
flows_dict = xmltodict.parse(xml_string)
110+
flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
111111

112112
# Minimalistic check if the XML is useful
113113
assert type(flows_dict['oml:flows']['oml:flow']) == list, \

openml/runs/functions.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -583,9 +583,6 @@ def get_run(run_id):
583583

584584
run = _create_run_from_xml(run_xml)
585585

586-
with io.open(run_file, "w", encoding='utf8') as fh:
587-
fh.write(run_xml)
588-
589586
return run
590587

591588

@@ -680,10 +677,18 @@ def _create_run_from_xml(xml):
680677
'description XML' % run_id)
681678

682679
if 'predictions' not in files:
683-
# JvR: actually, I am not sure whether this error should be raised.
684-
# a run can consist without predictions. But for now let's keep it
685-
raise ValueError('No prediction files for run %d in run '
686-
'description XML' % run_id)
680+
task = openml.tasks.get_task(task_id)
681+
if task.task_type_id == 8:
682+
raise NotImplementedError(
683+
'Subgroup discovery tasks are not yet supported.'
684+
)
685+
else:
686+
# JvR: actually, I am not sure whether this error should be raised.
687+
# a run can consist without predictions. But for now let's keep it
688+
# Matthias: yes, it should stay as long as we do not really handle
689+
# this stuff
690+
raise ValueError('No prediction files for run %d in run '
691+
'description XML' % run_id)
687692

688693
tags = openml.utils.extract_xml_tags('oml:tag', run)
689694

@@ -702,14 +707,17 @@ def _create_run_from_xml(xml):
702707

703708

704709
def _create_trace_from_description(xml):
705-
result_dict = xmltodict.parse(xml)['oml:trace']
710+
result_dict = xmltodict.parse(xml, force_list=('oml:trace_iteration',))['oml:trace']
706711

707712
run_id = result_dict['oml:run_id']
708713
trace = dict()
709714

710715
if 'oml:trace_iteration' not in result_dict:
711716
raise ValueError('Run does not contain valid trace. ')
712717

718+
assert type(result_dict['oml:trace_iteration']) == list, \
719+
type(result_dict['oml:trace_iteration'])
720+
713721
for itt in result_dict['oml:trace_iteration']:
714722
repeat = int(itt['oml:repeat'])
715723
fold = int(itt['oml:fold'])
@@ -857,7 +865,7 @@ def _list_runs(api_call):
857865

858866
xml_string = _perform_api_call(api_call)
859867

860-
runs_dict = xmltodict.parse(xml_string)
868+
runs_dict = xmltodict.parse(xml_string, force_list=('oml:run',))
861869
# Minimalistic check if the XML is useful
862870
if 'oml:runs' not in runs_dict:
863871
raise ValueError('Error in return XML, does not contain "oml:runs": %s'
@@ -872,15 +880,11 @@ def _list_runs(api_call):
872880
'"http://openml.org/openml": %s'
873881
% str(runs_dict))
874882

875-
if isinstance(runs_dict['oml:runs']['oml:run'], list):
876-
runs_list = runs_dict['oml:runs']['oml:run']
877-
elif isinstance(runs_dict['oml:runs']['oml:run'], dict):
878-
runs_list = [runs_dict['oml:runs']['oml:run']]
879-
else:
880-
raise TypeError()
883+
assert type(runs_dict['oml:runs']['oml:run']) == list, \
884+
type(runs_dict['oml:runs'])
881885

882886
runs = dict()
883-
for run_ in runs_list:
887+
for run_ in runs_dict['oml:runs']['oml:run']:
884888
run_id = int(run_['oml:run_id'])
885889
run = {'run_id': run_id,
886890
'task_id': int(run_['oml:task_id']),

openml/setups/functions.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def _list_setups(api_call):
116116

117117
xml_string = openml._api_calls._perform_api_call(api_call)
118118

119-
setups_dict = xmltodict.parse(xml_string)
119+
setups_dict = xmltodict.parse(xml_string, force_list=('oml:setup',))
120120
# Minimalistic check if the XML is useful
121121
if 'oml:setups' not in setups_dict:
122122
raise ValueError('Error in return XML, does not contain "oml:setups": %s'
@@ -131,15 +131,11 @@ def _list_setups(api_call):
131131
'"http://openml.org/openml": %s'
132132
% str(setups_dict))
133133

134-
if isinstance(setups_dict['oml:setups']['oml:setup'], list):
135-
setups_list = setups_dict['oml:setups']['oml:setup']
136-
elif isinstance(setups_dict['oml:setups']['oml:setup'], dict):
137-
setups_list = [setups_dict['oml:setups']['oml:setup']]
138-
else:
139-
raise TypeError()
134+
assert type(setups_dict['oml:setups']['oml:setup']) == list, \
135+
type(setups_dict['oml:setups'])
140136

141137
setups = dict()
142-
for setup_ in setups_list:
138+
for setup_ in setups_dict['oml:setups']['oml:setup']:
143139
# making it a dict to give it the right format
144140
current = _create_setup_from_xml({'oml:setup_parameters': setup_})
145141
setups[current.setup_id] = current

0 commit comments

Comments
 (0)