Skip to content

Commit 9d56347

Browse files
committed
FIX #151 remove undocumented feature private cache directory
1 parent a140586 commit 9d56347

6 files changed

Lines changed: 90 additions & 147 deletions

File tree

openml/config.py

Lines changed: 7 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""
2-
Stores module level information like the API key, cache director, private
3-
directory and the server.
2+
Stores module level information like the API key, cache directory and the server.
43
"""
54
import os
65
import sys
@@ -14,7 +13,6 @@
1413
server = "https://www.openml.org/api/v1/xml"
1514
apikey = ""
1615
cachedir = ""
17-
privatedir = ""
1816

1917

2018
if sys.version_info[0] < 3:
@@ -47,57 +45,35 @@ def _setup():
4745
config = _parse_config()
4846
apikey = config.get('FAKE_SECTION', 'apikey')
4947
server = config.get('FAKE_SECTION', 'server')
50-
private_dir = config.get('FAKE_SECTION', 'private_directory')
5148
cache_dir = config.get('FAKE_SECTION', 'cachedir')
52-
set_cache_directory(cache_dir, private_dir)
49+
set_cache_directory(cache_dir)
5350

5451

55-
def set_cache_directory(cachedir, privatedir=None):
52+
def set_cache_directory(cachedir):
5653
"""Set module-wide cache directory.
5754
5855
Sets the cache directory into which to download datasets, tasks etc.
59-
Also sets the private directory for storing local datasets.
6056
6157
Parameters
6258
----------
6359
cachedir : string
6460
Path to use as cache directory.
6561
66-
privatedir : string
67-
Path containing private datasets, tasks, etc.
68-
6962
See also
7063
--------
7164
get_cache_directory
72-
get_private_directory
7365
"""
74-
if privatedir is None:
75-
privatedir = cachedir
7666

7767
global _cachedir
78-
global _privatedir
7968
_cachedir = cachedir
80-
_privatedir = privatedir
8169

8270
# Set up the cache directories
8371
dataset_cache_dir = os.path.join(cachedir, "datasets")
8472
task_cache_dir = os.path.join(cachedir, "tasks")
8573
run_cache_dir = os.path.join(cachedir, 'runs')
8674

87-
# Set up the private directory
88-
_private_directory_datasets = os.path.join(
89-
privatedir, "datasets")
90-
_private_directory_tasks = os.path.join(
91-
privatedir, "tasks")
92-
_private_directory_runs = os.path.join(
93-
privatedir, "runs")
94-
95-
for dir_ in [cachedir, dataset_cache_dir,
96-
task_cache_dir, run_cache_dir,
97-
privatedir,
98-
_private_directory_datasets,
99-
_private_directory_tasks,
100-
_private_directory_runs]:
75+
76+
for dir_ in [cachedir, dataset_cache_dir, task_cache_dir, run_cache_dir]:
10177
if not os.path.exists(dir_) and not os.path.isdir(dir_):
10278
os.mkdir(dir_)
10379

@@ -108,8 +84,7 @@ def _parse_config():
10884
defaults = {'apikey': apikey,
10985
'server': server,
11086
'verbosity': 0,
111-
'cachedir': os.path.expanduser('~/.openml/cache'),
112-
'private_directory': os.path.expanduser('~/.openml/private')}
87+
'cachedir': os.path.expanduser('~/.openml/cache')}
11388

11489
config_file = os.path.expanduser('~/.openml/config')
11590
config = configparser.RawConfigParser(defaults=defaults)
@@ -147,26 +122,10 @@ def get_cache_directory():
147122
See also
148123
--------
149124
set_cache_directory
150-
get_private_directory
151125
"""
152126
return _cachedir
153127

154128

155-
def get_private_directory():
156-
"""Get the current private directory.
157-
158-
Returns
159-
-------
160-
privatecir : string
161-
The current private directory.
162-
163-
See also
164-
--------
165-
set_cache_directory
166-
get_cache_directory
167-
"""
168-
return _privatedir
169-
170-
__all__ = ["set_cache_directory", 'get_cache_directory', 'get_private_directory']
129+
__all__ = ["set_cache_directory", 'get_cache_directory']
171130

172131
_setup()

openml/datasets/functions.py

Lines changed: 49 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,28 @@ def _list_cached_datasets():
2323
"""
2424
datasets = []
2525

26-
for dataset_cache in [config.get_cache_directory(), config.get_private_directory()]:
27-
dataset_cache_dir = os.path.join(dataset_cache, "datasets")
28-
directory_content = os.listdir(dataset_cache_dir)
29-
directory_content.sort()
30-
31-
# Find all dataset ids for which we have downloaded the dataset
32-
# description
33-
for directory_name in directory_content:
34-
# First check if the directory name could be an OpenML dataset id
35-
if not re.match(r"[0-9]*", directory_name):
36-
continue
26+
dataset_cache = config.get_cache_directory()
27+
dataset_cache_dir = os.path.join(dataset_cache, "datasets")
28+
directory_content = os.listdir(dataset_cache_dir)
29+
directory_content.sort()
30+
31+
# Find all dataset ids for which we have downloaded the dataset
32+
# description
33+
for directory_name in directory_content:
34+
# First check if the directory name could be an OpenML dataset id
35+
if not re.match(r"[0-9]*", directory_name):
36+
continue
3737

38-
dataset_id = int(directory_name)
38+
dataset_id = int(directory_name)
3939

40-
directory_name = os.path.join(dataset_cache_dir,
41-
directory_name)
42-
dataset_directory_content = os.listdir(directory_name)
40+
directory_name = os.path.join(dataset_cache_dir,
41+
directory_name)
42+
dataset_directory_content = os.listdir(directory_name)
4343

44-
if "dataset.arff" in dataset_directory_content and \
45-
"description.xml" in dataset_directory_content:
46-
if dataset_id not in datasets:
47-
datasets.append(dataset_id)
44+
if "dataset.arff" in dataset_directory_content and \
45+
"description.xml" in dataset_directory_content:
46+
if dataset_id not in datasets:
47+
datasets.append(dataset_id)
4848

4949
datasets.sort()
5050
return datasets
@@ -79,53 +79,44 @@ def _get_cached_dataset(dataset_id):
7979

8080

8181
def _get_cached_dataset_description(dataset_id):
82-
for cache_dir in [config.get_cache_directory(),
83-
config.get_private_directory()]:
84-
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
85-
description_file = os.path.join(did_cache_dir, "description.xml")
86-
try:
87-
with io.open(description_file, encoding='utf8') as fh:
88-
dataset_xml = fh.read()
89-
except (IOError, OSError):
90-
continue
91-
82+
cache_dir = config.get_cache_directory()
83+
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
84+
description_file = os.path.join(did_cache_dir, "description.xml")
85+
try:
86+
with io.open(description_file, encoding='utf8') as fh:
87+
dataset_xml = fh.read()
9288
return xmltodict.parse(dataset_xml)["oml:data_set_description"]
89+
except (IOError, OSError):
90+
raise OpenMLCacheException(
91+
"Dataset description for dataset id %d not "
92+
"cached" % dataset_id)
9393

94-
raise OpenMLCacheException("Dataset description for dataset id %d not "
95-
"cached" % dataset_id)
9694

9795
def _get_cached_dataset_features(dataset_id):
98-
for cache_dir in [config.get_cache_directory(),
99-
config.get_private_directory()]:
100-
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
101-
features_file = os.path.join(did_cache_dir, "features.xml")
102-
try:
103-
with io.open(features_file, encoding='utf8') as fh:
104-
features_xml = fh.read()
105-
except (IOError, OSError):
106-
continue
107-
108-
return xmltodict.parse(features_xml)["oml:data_features"]
109-
110-
raise OpenMLCacheException("Dataset features for dataset id %d not "
111-
"cached" % dataset_id)
96+
cache_dir = config.get_cache_directory()
97+
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
98+
features_file = os.path.join(did_cache_dir, "features.xml")
99+
try:
100+
with io.open(features_file, encoding='utf8') as fh:
101+
features_xml = fh.read()
102+
return xmltodict.parse(features_xml)["oml:data_features"]
103+
except (IOError, OSError):
104+
raise OpenMLCacheException("Dataset features for dataset id %d not "
105+
"cached" % dataset_id)
112106

113107

114108
def _get_cached_dataset_arff(dataset_id):
115-
for cache_dir in [config.get_cache_directory(),
116-
config.get_private_directory()]:
117-
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
118-
output_file = os.path.join(did_cache_dir, "dataset.arff")
109+
cache_dir = config.get_cache_directory()
110+
did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
111+
output_file = os.path.join(did_cache_dir, "dataset.arff")
119112

120-
try:
121-
with io.open(output_file, encoding='utf8'):
122-
pass
123-
return output_file
124-
except (OSError, IOError):
125-
continue
126-
127-
raise OpenMLCacheException("ARFF file for dataset id %d not "
128-
"cached" % dataset_id)
113+
try:
114+
with io.open(output_file, encoding='utf8'):
115+
pass
116+
return output_file
117+
except (OSError, IOError):
118+
raise OpenMLCacheException("ARFF file for dataset id %d not "
119+
"cached" % dataset_id)
129120

130121

131122
def list_datasets(offset=None, size=None, tag=None):

openml/runs/functions.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -260,21 +260,18 @@ def _create_run_from_xml(xml):
260260

261261
def _get_cached_run(run_id):
262262
"""Load a run from the cache."""
263-
for cache_dir in [config.get_cache_directory(),
264-
config.get_private_directory()]:
265-
run_cache_dir = os.path.join(cache_dir, "runs")
266-
try:
267-
run_file = os.path.join(run_cache_dir,
268-
"run_%d.xml" % int(run_id))
269-
with io.open(run_file, encoding='utf8') as fh:
270-
run = _create_task_from_xml(xml=fh.read())
271-
return run
272-
273-
except (OSError, IOError):
274-
continue
275-
276-
raise OpenMLCacheException("Run file for run id %d not "
277-
"cached" % run_id)
263+
cache_dir = config.get_cache_directory()
264+
run_cache_dir = os.path.join(cache_dir, "runs")
265+
try:
266+
run_file = os.path.join(run_cache_dir,
267+
"run_%d.xml" % int(run_id))
268+
with io.open(run_file, encoding='utf8') as fh:
269+
run = _create_task_from_xml(xml=fh.read())
270+
return run
271+
272+
except (OSError, IOError):
273+
raise OpenMLCacheException("Run file for run id %d not "
274+
"cached" % run_id)
278275

279276

280277
def list_runs(offset=None, size=None, id=None, task=None,

openml/tasks/functions.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,39 +14,37 @@
1414

1515
def _get_cached_tasks():
1616
tasks = OrderedDict()
17-
for cache_dir in [config.get_cache_directory(), config.get_private_directory()]:
17+
cache_dir = config.get_cache_directory()
1818

19-
task_cache_dir = os.path.join(cache_dir, "tasks")
20-
directory_content = os.listdir(task_cache_dir)
21-
directory_content.sort()
19+
task_cache_dir = os.path.join(cache_dir, "tasks")
20+
directory_content = os.listdir(task_cache_dir)
21+
directory_content.sort()
2222

23-
# Find all dataset ids for which we have downloaded the dataset
24-
# description
23+
# Find all dataset ids for which we have downloaded the dataset
24+
# description
2525

26-
for filename in directory_content:
27-
if not re.match(r"[0-9]*", filename):
28-
continue
26+
for filename in directory_content:
27+
if not re.match(r"[0-9]*", filename):
28+
continue
2929

30-
tid = int(filename)
31-
tasks[tid] = _get_cached_task(tid)
30+
tid = int(filename)
31+
tasks[tid] = _get_cached_task(tid)
3232

3333
return tasks
3434

3535

3636
def _get_cached_task(tid):
37-
for cache_dir in [config.get_cache_directory(), config.get_private_directory()]:
38-
task_cache_dir = os.path.join(cache_dir, "tasks")
39-
task_file = os.path.join(task_cache_dir, str(tid), "task.xml")
40-
41-
try:
42-
with io.open(task_file, encoding='utf8') as fh:
43-
task = _create_task_from_xml(xml=fh.read())
44-
return task
45-
except (OSError, IOError):
46-
continue
37+
cache_dir = config.get_cache_directory()
38+
task_cache_dir = os.path.join(cache_dir, "tasks")
39+
task_file = os.path.join(task_cache_dir, str(tid), "task.xml")
4740

48-
raise OpenMLCacheException("Task file for tid %d not "
49-
"cached" % tid)
41+
try:
42+
with io.open(task_file, encoding='utf8') as fh:
43+
task = _create_task_from_xml(xml=fh.read())
44+
return task
45+
except (OSError, IOError):
46+
raise OpenMLCacheException("Task file for tid %d not "
47+
"cached" % tid)
5048

5149

5250
def _get_estimation_procedure_list():

openml/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def setUp(self):
4646
self.test_server = "https://test.openml.org/api/v1/xml"
4747
openml.config.server = self.test_server
4848

49-
openml.config.set_cache_directory(self.workdir, self.workdir)
49+
openml.config.set_cache_directory(self.workdir)
5050

5151
def tearDown(self):
5252
os.chdir(self.cwd)

tests/test_runs/test_run_functions.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def test_run_optimize_bagging_iris(self):
5353
num_folds = 10
5454
num_iterations = 36 # (num values for C times gamma)
5555

56-
task = openml.tasks.get_task(task_id)
5756
bag = BaggingClassifier(base_estimator=SVC())
5857
param_dist = {"base_estimator__C": [0.001, 0.01, 0.1, 1, 10, 100],
5958
"base_estimator__gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
@@ -62,7 +61,6 @@ def test_run_optimize_bagging_iris(self):
6261
run = self._perform_run(task_id, num_instances, grid_search)
6362
self.assertEqual(len(run.trace_content), num_iterations * num_folds)
6463

65-
6664
def test__run_task_get_arffcontent(self):
6765
task = openml.tasks.get_task(1939)
6866
class_labels = task.class_labels

0 commit comments

Comments
 (0)