Skip to content

Commit 8a8c6dd

Browse files
authored
Merge branch 'develop' into dataupload
2 parents fc91146 + ba7b2eb commit 8a8c6dd

9 files changed

Lines changed: 807 additions & 13 deletions

File tree

doc/usage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ We can filter the list of tasks to only contain datasets with more than
143143
>>> filtered_tasks = tasks.query('NumberOfInstances > 500 and NumberOfInstances < 1000')
144144
>>> print(list(filtered_tasks.index)) # doctest: +SKIP
145145
[2, 11, 15, 29, 37, 41, 49, 53, ..., 146597, 146600, 146605]
146-
>>> print(len(filtered_tasks))
146+
>>> print(len(filtered_tasks)) # doctest: +SKIP
147147
210
148148
149149
Then, we can further restrict the tasks to all have the same resampling

openml/runs/functions.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,11 @@ def get_run(run_id):
638638
run : OpenMLRun
639639
Run corresponding to ID, fetched from the server.
640640
"""
641-
run_file = os.path.join(config.get_cache_directory(), "runs",
642-
"run_%d.xml" % run_id)
641+
run_dir = os.path.join(config.get_cache_directory(), "runs", str(run_id))
642+
run_file = os.path.join(run_dir, "description.xml")
643+
644+
if not os.path.exists(run_dir):
645+
os.makedirs(run_dir)
643646

644647
try:
645648
return _get_cached_run(run_id)
@@ -667,7 +670,7 @@ def _create_run_from_xml(xml, from_server=True):
667670
run : OpenMLRun
668671
New run object representing run_xml.
669672
"""
670-
673+
671674
def obtain_field(xml_obj, fieldname, from_server, cast=None):
672675
# this function can be used to check whether a field is present in an object.
673676
# if it is not present, either returns None or throws an error (this is
@@ -694,7 +697,6 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
694697
else:
695698
task_evaluation_measure = None
696699

697-
698700
flow_id = int(run['oml:flow_id'])
699701
flow_name = obtain_field(run, 'oml:flow_name', from_server)
700702
setup_id = obtain_field(run, 'oml:setup_id', from_server, cast=int)
@@ -872,10 +874,9 @@ def _create_trace_from_arff(arff_obj):
872874
def _get_cached_run(run_id):
873875
"""Load a run from the cache."""
874876
cache_dir = config.get_cache_directory()
875-
run_cache_dir = os.path.join(cache_dir, "runs")
877+
run_cache_dir = os.path.join(cache_dir, "runs", str(run_id))
876878
try:
877-
run_file = os.path.join(run_cache_dir,
878-
"run_%d.xml" % int(run_id))
879+
run_file = os.path.join(run_cache_dir, "description.xml")
879880
with io.open(run_file, encoding='utf8') as fh:
880881
run = _create_run_from_xml(xml=fh.read())
881882
return run

openml/setups/functions.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from collections import OrderedDict
22

3+
import io
34
import openml
5+
import os
46
import xmltodict
57

8+
from .. import config
69
from .setup import OpenMLSetup, OpenMLParameter
710
from openml.flows import flow_exists
811

@@ -54,8 +57,23 @@ def setup_exists(flow, model=None):
5457
return False
5558

5659

60+
def _get_cached_setup(setup_id):
61+
"""Load a run from the cache."""
62+
cache_dir = config.get_cache_directory()
63+
setup_cache_dir = os.path.join(cache_dir, "setups", str(setup_id))
64+
try:
65+
setup_file = os.path.join(setup_cache_dir, "description.xml")
66+
with io.open(setup_file, encoding='utf8') as fh:
67+
setup_xml = xmltodict.parse(fh.read())
68+
setup = _create_setup_from_xml(setup_xml)
69+
return setup
70+
71+
except (OSError, IOError):
72+
raise openml.exceptions.OpenMLCacheException("Setup file for setup id %d not cached" % setup_id)
73+
74+
5775
def get_setup(setup_id):
58-
'''
76+
"""
5977
Downloads the setup (configuration) description from OpenML
6078
and returns a structured object
6179
@@ -68,9 +86,22 @@ def get_setup(setup_id):
6886
-------
6987
OpenMLSetup
7088
an initialized openml setup object
71-
'''
72-
result = openml._api_calls._perform_api_call('/setup/%d' %setup_id)
73-
result_dict = xmltodict.parse(result)
89+
"""
90+
setup_dir = os.path.join(config.get_cache_directory(), "setups", str(setup_id))
91+
setup_file = os.path.join(setup_dir, "description.xml")
92+
93+
if not os.path.exists(setup_dir):
94+
os.makedirs(setup_dir)
95+
96+
try:
97+
return _get_cached_setup(setup_id)
98+
99+
except (openml.exceptions.OpenMLCacheException):
100+
setup_xml = openml._api_calls._perform_api_call('/setup/%d' % setup_id)
101+
with io.open(setup_file, "w", encoding='utf8') as fh:
102+
fh.write(setup_xml)
103+
104+
result_dict = xmltodict.parse(setup_xml)
74105
return _create_setup_from_xml(result_dict)
75106

76107

@@ -212,6 +243,7 @@ def _to_dict(flow_id, openml_parameter_settings):
212243

213244
return xml
214245

246+
215247
def _create_setup_from_xml(result_dict):
216248
'''
217249
Turns an API xml result into a OpenMLSetup object

openml/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import xmltodict
12
import six
3+
from ._api_calls import _perform_api_call
24

35
from openml.exceptions import OpenMLServerException
46

@@ -40,6 +42,55 @@ def extract_xml_tags(xml_tag_name, node, allow_none=True):
4042
else:
4143
raise ValueError("Could not find tag '%s' in node '%s'" %
4244
(xml_tag_name, str(node)))
45+
46+
47+
def _tag_entity(entity_type, entity_id, tag, untag=False):
48+
"""Function that tags or untags a given entity on OpenML. As the OpenML
49+
API tag functions all consist of the same format, this function covers
50+
all entity types (currently: dataset, task, flow, setup, run). Could
51+
be used in a partial to provide dataset_tag, dataset_untag, etc.
52+
53+
Parameters
54+
----------
55+
entity_type : str
56+
Name of the entity to tag (e.g., run, flow, data)
57+
58+
entity_id : int
59+
OpenML id of the entity
60+
61+
tag : str
62+
The tag
63+
64+
untag : bool
65+
Set to true if needed to untag, rather than tag
66+
67+
Returns
68+
-------
69+
tags : list
70+
List of tags that the entity is (still) tagged with
71+
"""
72+
legal_entities = {'data', 'task', 'flow', 'setup', 'run'}
73+
if entity_type not in legal_entities:
74+
raise ValueError('Can\'t tag a %s' %entity_type)
75+
76+
uri = '%s/tag' %entity_type
77+
main_tag = 'oml:%s_tag' %entity_type
78+
if untag:
79+
uri = '%s/untag' %entity_type
80+
main_tag = 'oml:%s_untag' %entity_type
81+
82+
83+
post_variables = {'%s_id'%entity_type: entity_id, 'tag': tag}
84+
result_xml = _perform_api_call(uri, post_variables)
85+
86+
result = xmltodict.parse(result_xml, force_list={'oml:tag'})[main_tag]
87+
88+
if 'oml:tag' in result:
89+
return result['oml:tag']
90+
else:
91+
# no tags, return empty list
92+
return []
93+
4394

4495
def list_all(listing_call, batch_size=10000, *args, **filters):
4596
"""Helper to handle paged listing requests.

0 commit comments

Comments
 (0)