Skip to content

Commit 5cc1069

Browse files
committed
Merge pull request #104 from amueller/xml_refactoring
make "close to the metal" xml functions private
2 parents 20f0292 + cc6ff94 commit 5cc1069

4 files changed

Lines changed: 16 additions & 19 deletions

File tree

doc/api.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ Top-level Classes
2929
:template: function.rst
3030

3131
check_datasets_active
32-
get_dataset_description
33-
get_dataset_features
34-
get_dataset_qualities
3532
get_dataset
3633
get_datasets
3734
list_datasets

openml/datasets/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from .functions import (list_datasets, list_datasets_by_tag,
22
check_datasets_active, get_datasets, get_dataset,
3-
get_dataset_description,
4-
get_dataset_features, get_dataset_qualities)
3+
_get_dataset_description,
4+
_get_dataset_features, _get_dataset_qualities)
55
from .dataset import OpenMLDataset
66

77
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
8-
'get_datasets_arf', 'get_dataset_features',
9-
'get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
8+
'get_datasets_arf', '_get_dataset_features',
9+
'_get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
1010
'list_datasets_by_tag',
11-
'get_dataset_description', 'list_datasets']
11+
'_get_dataset_description', 'list_datasets']

openml/datasets/functions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,14 +236,14 @@ def get_dataset(did):
236236
raise ValueError("Dataset ID is neither an Integer nor can be "
237237
"cast to an Integer.")
238238

239-
description = get_dataset_description(did)
239+
description = _get_dataset_description(did)
240240
arff_file = _get_dataset_arff(did, description=description)
241241

242242
dataset = _create_dataset_from_description(description, arff_file)
243243
return dataset
244244

245245

246-
def get_dataset_description(did):
246+
def _get_dataset_description(did):
247247
# TODO implement a cache for this that invalidates itself after some
248248
# time
249249
# This can be saved on disk, but cannot be cached properly, because
@@ -295,7 +295,7 @@ def _get_dataset_arff(did, description=None):
295295
pass
296296

297297
if description is None:
298-
description = get_dataset_description(did)
298+
description = _get_dataset_description(did)
299299
url = description['oml:url']
300300
return_code, arff_string = _read_url(url)
301301
# TODO: it is inefficient to load the dataset in memory prior to
@@ -307,7 +307,7 @@ def _get_dataset_arff(did, description=None):
307307
return output_file
308308

309309

310-
def get_dataset_features(did):
310+
def _get_dataset_features(did):
311311
did_cache_dir = _create_dataset_cache_directory(did)
312312
features_file = os.path.join(did_cache_dir, "features.xml")
313313

@@ -337,7 +337,7 @@ def get_dataset_features(did):
337337
return features
338338

339339

340-
def get_dataset_qualities(did):
340+
def _get_dataset_qualities(did):
341341
# Dataset qualities are subject to change and must be fetched every time
342342
did_cache_dir = _create_dataset_cache_directory(did)
343343
qualities_file = os.path.join(did_cache_dir, "qualities.xml")

tests/test_datasets.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,20 +109,20 @@ def test_download_rowid(self):
109109
dataset = openml.datasets.get_dataset(did)
110110
self.assertEqual(dataset.row_id_attribute, 'instance')
111111

112-
def test_get_dataset_description(self):
112+
def test__get_dataset_description(self):
113113
# Only a smoke test, I don't know exactly how to test the URL
114114
# retrieval and "caching"
115-
description = openml.datasets.get_dataset_description(2)
115+
description = openml.datasets._get_dataset_description(2)
116116
self.assertIsInstance(description, dict)
117117

118-
def test_get_dataset_features(self):
118+
def test__get_dataset_features(self):
119119
# Only a smoke check
120-
features = openml.datasets.get_dataset_features(2)
120+
features = openml.datasets._get_dataset_features(2)
121121
self.assertIsInstance(features, dict)
122122

123-
def test_get_dataset_qualities(self):
123+
def test__get_dataset_qualities(self):
124124
# Only a smoke check
125-
qualities = openml.datasets.get_dataset_qualities(2)
125+
qualities = openml.datasets._get_dataset_qualities(2)
126126
self.assertIsInstance(qualities, dict)
127127

128128
def test_publish_dataset(self):

0 commit comments

Comments
 (0)