1414from .dataset import OpenMLDataset
1515from ..exceptions import OpenMLCacheException , OpenMLServerException , \
1616 OpenMLHashException , PrivateDatasetError
17- from .. import config
18- from .._api_calls import _read_url
17+ from ..utils import (
18+ _create_cache_directory ,
19+ _remove_cache_dir_for_id ,
20+ _create_cache_directory_for_id ,
21+ _create_lockfiles_dir ,
22+ )
23+
24+
25+ DATASETS_CACHE_DIR_NAME = 'datasets'
26+
1927
2028
2129############################################################################
2230# Local getters/accessors to the cache directory
2331
32+
2433def _list_cached_datasets ():
2534 """Return list with ids of all cached datasets
2635
@@ -31,8 +40,7 @@ def _list_cached_datasets():
3140 """
3241 datasets = []
3342
34- dataset_cache = config .get_cache_directory ()
35- dataset_cache_dir = os .path .join (dataset_cache , "datasets" )
43+ dataset_cache_dir = _create_cache_directory (DATASETS_CACHE_DIR_NAME )
3644 directory_content = os .listdir (dataset_cache_dir )
3745 directory_content .sort ()
3846
@@ -88,8 +96,9 @@ def _get_cached_dataset(dataset_id):
8896
8997
9098def _get_cached_dataset_description (dataset_id ):
91- cache_dir = config .get_cache_directory ()
92- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
99+ did_cache_dir = _create_cache_directory_for_id (
100+ DATASETS_CACHE_DIR_NAME , dataset_id ,
101+ )
93102 description_file = os .path .join (did_cache_dir , "description.xml" )
94103 try :
95104 with io .open (description_file , encoding = 'utf8' ) as fh :
@@ -102,8 +111,9 @@ def _get_cached_dataset_description(dataset_id):
102111
103112
104113def _get_cached_dataset_features (dataset_id ):
105- cache_dir = config .get_cache_directory ()
106- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
114+ did_cache_dir = _create_cache_directory_for_id (
115+ DATASETS_CACHE_DIR_NAME , dataset_id ,
116+ )
107117 features_file = os .path .join (did_cache_dir , "features.xml" )
108118 try :
109119 with io .open (features_file , encoding = 'utf8' ) as fh :
@@ -115,8 +125,9 @@ def _get_cached_dataset_features(dataset_id):
115125
116126
117127def _get_cached_dataset_qualities (dataset_id ):
118- cache_dir = config .get_cache_directory ()
119- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
128+ did_cache_dir = _create_cache_directory_for_id (
129+ DATASETS_CACHE_DIR_NAME , dataset_id ,
130+ )
120131 qualities_file = os .path .join (did_cache_dir , "qualities.xml" )
121132 try :
122133 with io .open (qualities_file , encoding = 'utf8' ) as fh :
@@ -128,8 +139,9 @@ def _get_cached_dataset_qualities(dataset_id):
128139
129140
130141def _get_cached_dataset_arff (dataset_id ):
131- cache_dir = config .get_cache_directory ()
132- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
142+ did_cache_dir = _create_cache_directory_for_id (
143+ DATASETS_CACHE_DIR_NAME , dataset_id ,
144+ )
133145 output_file = os .path .join (did_cache_dir , "dataset.arff" )
134146
135147 try :
@@ -311,9 +323,11 @@ def get_dataset(dataset_id):
311323
312324 with lockutils .external_lock (
313325 name = 'datasets.functions.get_dataset:%d' % dataset_id ,
314- lock_path = os . path . join ( config . get_cache_directory (), 'locks' ),
326+ lock_path = _create_lockfiles_dir ( ),
315327 ):
316- did_cache_dir = _create_dataset_cache_directory (dataset_id )
328+ did_cache_dir = _create_cache_directory_for_id (
329+ DATASETS_CACHE_DIR_NAME , dataset_id ,
330+ )
317331
318332 try :
319333 remove_dataset_cache = True
@@ -330,7 +344,7 @@ def get_dataset(dataset_id):
330344 raise e
331345 finally :
332346 if remove_dataset_cache :
333- _remove_dataset_cache_dir ( did_cache_dir )
347+ _remove_cache_dir_for_id ( DATASETS_CACHE_DIR_NAME , did_cache_dir )
334348
335349 dataset = _create_dataset_from_description (
336350 description , features , qualities , arff_file
@@ -412,7 +426,7 @@ def _get_dataset_arff(did_cache_dir, description):
412426 pass
413427
414428 url = description ['oml:url' ]
415- arff_string = _read_url (url )
429+ arff_string = openml . _api_calls . _read_url (url )
416430 md5 = hashlib .md5 ()
417431 md5 .update (arff_string .encode ('utf-8' ))
418432 md5_checksum = md5 .hexdigest ()
@@ -505,55 +519,6 @@ def _get_dataset_qualities(did_cache_dir, dataset_id):
505519 return qualities
506520
507521
508- def _create_dataset_cache_directory (dataset_id ):
509- """Create a dataset cache directory
510-
511- In order to have a clearer cache structure and because every dataset
512- is cached in several files (description, arff, features, qualities), there
513- is a directory for each dataset witch the dataset ID being the directory
514- name. This function creates this cache directory.
515-
516- This function is NOT thread/multiprocessing safe.
517-
518- Parameters
519- ----------
520- did : int
521- Dataset ID
522-
523- Returns
524- -------
525- str
526- Path of the created dataset cache directory.
527- """
528- dataset_cache_dir = os .path .join (
529- config .get_cache_directory (),
530- "datasets" ,
531- str (dataset_id ),
532- )
533- if os .path .exists (dataset_cache_dir ) and os .path .isdir (dataset_cache_dir ):
534- pass
535- elif os .path .exists (dataset_cache_dir ) and not os .path .isdir (dataset_cache_dir ):
536- raise ValueError ('Dataset cache dir exists but is not a directory!' )
537- else :
538- os .makedirs (dataset_cache_dir )
539- return dataset_cache_dir
540-
541-
542- def _remove_dataset_cache_dir (did_cache_dir ):
543- """Remove the dataset cache directory
544-
545- This function is NOT thread/multiprocessing safe.
546-
547- Parameters
548- ----------
549- """
550- try :
551- shutil .rmtree (did_cache_dir )
552- except (OSError , IOError ):
553- raise ValueError ('Cannot remove faulty dataset cache directory %s.'
554- 'Please do this manually!' % did_cache_dir )
555-
556-
557522def _create_dataset_from_description (description , features , qualities , arff_file ):
558523 """Create a dataset object from a description dict.
559524
0 commit comments