@@ -23,28 +23,28 @@ def _list_cached_datasets():
2323 """
2424 datasets = []
2525
26- for dataset_cache in [ config .get_cache_directory (), config . get_private_directory ()]:
27- dataset_cache_dir = os .path .join (dataset_cache , "datasets" )
28- directory_content = os .listdir (dataset_cache_dir )
29- directory_content .sort ()
30-
31- # Find all dataset ids for which we have downloaded the dataset
32- # description
33- for directory_name in directory_content :
34- # First check if the directory name could be an OpenML dataset id
35- if not re .match (r"[0-9]*" , directory_name ):
36- continue
26+ dataset_cache = config .get_cache_directory ()
27+ dataset_cache_dir = os .path .join (dataset_cache , "datasets" )
28+ directory_content = os .listdir (dataset_cache_dir )
29+ directory_content .sort ()
30+
31+ # Find all dataset ids for which we have downloaded the dataset
32+ # description
33+ for directory_name in directory_content :
34+ # First check if the directory name could be an OpenML dataset id
35+ if not re .match (r"[0-9]*" , directory_name ):
36+ continue
3737
38- dataset_id = int (directory_name )
38+ dataset_id = int (directory_name )
3939
40- directory_name = os .path .join (dataset_cache_dir ,
41- directory_name )
42- dataset_directory_content = os .listdir (directory_name )
40+ directory_name = os .path .join (dataset_cache_dir ,
41+ directory_name )
42+ dataset_directory_content = os .listdir (directory_name )
4343
44- if "dataset.arff" in dataset_directory_content and \
45- "description.xml" in dataset_directory_content :
46- if dataset_id not in datasets :
47- datasets .append (dataset_id )
44+ if "dataset.arff" in dataset_directory_content and \
45+ "description.xml" in dataset_directory_content :
46+ if dataset_id not in datasets :
47+ datasets .append (dataset_id )
4848
4949 datasets .sort ()
5050 return datasets
@@ -79,53 +79,44 @@ def _get_cached_dataset(dataset_id):
7979
8080
8181def _get_cached_dataset_description (dataset_id ):
82- for cache_dir in [config .get_cache_directory (),
83- config .get_private_directory ()]:
84- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
85- description_file = os .path .join (did_cache_dir , "description.xml" )
86- try :
87- with io .open (description_file , encoding = 'utf8' ) as fh :
88- dataset_xml = fh .read ()
89- except (IOError , OSError ):
90- continue
91-
82+ cache_dir = config .get_cache_directory ()
83+ did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
84+ description_file = os .path .join (did_cache_dir , "description.xml" )
85+ try :
86+ with io .open (description_file , encoding = 'utf8' ) as fh :
87+ dataset_xml = fh .read ()
9288 return xmltodict .parse (dataset_xml )["oml:data_set_description" ]
89+ except (IOError , OSError ):
90+ raise OpenMLCacheException (
91+ "Dataset description for dataset id %d not "
92+ "cached" % dataset_id )
9393
94- raise OpenMLCacheException ("Dataset description for dataset id %d not "
95- "cached" % dataset_id )
9694
9795def _get_cached_dataset_features (dataset_id ):
98- for cache_dir in [config .get_cache_directory (),
99- config .get_private_directory ()]:
100- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
101- features_file = os .path .join (did_cache_dir , "features.xml" )
102- try :
103- with io .open (features_file , encoding = 'utf8' ) as fh :
104- features_xml = fh .read ()
105- except (IOError , OSError ):
106- continue
107-
108- return xmltodict .parse (features_xml )["oml:data_features" ]
109-
110- raise OpenMLCacheException ("Dataset features for dataset id %d not "
111- "cached" % dataset_id )
96+ cache_dir = config .get_cache_directory ()
97+ did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
98+ features_file = os .path .join (did_cache_dir , "features.xml" )
99+ try :
100+ with io .open (features_file , encoding = 'utf8' ) as fh :
101+ features_xml = fh .read ()
102+ return xmltodict .parse (features_xml )["oml:data_features" ]
103+ except (IOError , OSError ):
104+ raise OpenMLCacheException ("Dataset features for dataset id %d not "
105+ "cached" % dataset_id )
112106
113107
114108def _get_cached_dataset_arff (dataset_id ):
115- for cache_dir in [config .get_cache_directory (),
116- config .get_private_directory ()]:
117- did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
118- output_file = os .path .join (did_cache_dir , "dataset.arff" )
109+ cache_dir = config .get_cache_directory ()
110+ did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
111+ output_file = os .path .join (did_cache_dir , "dataset.arff" )
119112
120- try :
121- with io .open (output_file , encoding = 'utf8' ):
122- pass
123- return output_file
124- except (OSError , IOError ):
125- continue
126-
127- raise OpenMLCacheException ("ARFF file for dataset id %d not "
128- "cached" % dataset_id )
113+ try :
114+ with io .open (output_file , encoding = 'utf8' ):
115+ pass
116+ return output_file
117+ except (OSError , IOError ):
118+ raise OpenMLCacheException ("ARFF file for dataset id %d not "
119+ "cached" % dataset_id )
129120
130121
131122def list_datasets (offset = None , size = None , tag = None ):
0 commit comments