@@ -110,7 +110,7 @@ def list_datasets():
110110
111111 Returns
112112 -------
113- list
113+ datasets : list of dicts
114114 A list of all datasets. Every dataset is represented by a
115115 dictionary containing the following information: dataset id,
116116 and status. If qualities are calculated for the dataset, some of
@@ -124,7 +124,7 @@ def list_datasets_by_tag(tag):
124124
125125 Returns
126126 -------
127- list
127+ datasets : list of dicts
128128 A list of all datasets having the given tag. Every dataset is
129129 represented by a dictionary containing the following information:
130130 dataset id, and status. If qualities are calculated for the dataset,
@@ -174,7 +174,7 @@ def check_datasets_active(dids):
174174 A list of integers representing dataset ids.
175175
176176 Returns
177- -------
177+ active : dict of int to boolean
178178 dict
179179 A dictionary with items {did: active}, where active is a boolean. It
180180 is set to True if the dataset is active.
@@ -202,7 +202,7 @@ def get_datasets(dids):
202202
203203 Returns
204204 -------
205- list
205+ datasets : list of datasets
206206 A list of dataset objects.
207207
208208 Notes
@@ -282,6 +282,24 @@ def _get_dataset_description(did):
282282
283283
284284def _get_dataset_arff (did , description = None ):
285+ """Load dataset arff (from cache or download).
286+
287+ Tries to load did from cache. If that fails, uses
288+ ``description`` (fetched if none) to download arff.
289+
290+ Parameters
291+ ----------
292+ did : int
293+ Dataset ID
294+
295+ description : dictionary
296+ Dataset description dict.
297+
298+ Returns
299+ -------
300+ output_filename : string
301+ Location of arff file.
302+ """
285303 did_cache_dir = _create_dataset_cache_directory (did )
286304 output_file = os .path .join (did_cache_dir , "dataset.arff" )
287305
@@ -308,6 +326,21 @@ def _get_dataset_arff(did, description=None):
308326
309327
310328def _get_dataset_features (did ):
329+ """API call to get dataset features (cached)
330+
331+ Features are feature descriptions for each column.
332+ (name, index, categorical, ...)
333+
334+ Parameters
335+ ----------
336+ did : int
337+ Dataset ID
338+
339+ Returns
340+ -------
341+ features : dict
342+ Dictionary containing dataset feature descriptions.
343+ """
311344 did_cache_dir = _create_dataset_cache_directory (did )
312345 features_file = os .path .join (did_cache_dir , "features.xml" )
313346
@@ -338,6 +371,20 @@ def _get_dataset_features(did):
338371
339372
340373def _get_dataset_qualities (did ):
374+ """API call to get dataset qualities (cached)
375+
376+ Features are metafeatures (number of features, number of classes, ...)
377+
378+ Parameters
379+ ----------
380+ did : int
381+ Dataset ID
382+
383+ Returns
384+ -------
385+ qualities : dict
386+ Dictionary containing dataset qualities.
387+ """
341388 # Dataset qualities are subject to change and must be fetched every time
342389 did_cache_dir = _create_dataset_cache_directory (did )
343390 qualities_file = os .path .join (did_cache_dir , "qualities.xml" )
@@ -362,6 +409,7 @@ def _get_dataset_qualities(did):
362409
363410
364411def _create_dataset_cache_directory (did ):
412+ """Create a dataset cache directory"""
365413 dataset_cache_dir = os .path .join (config .get_cache_directory (), "datasets" , str (did ))
366414 try :
367415 os .makedirs (dataset_cache_dir )
@@ -372,6 +420,7 @@ def _create_dataset_cache_directory(did):
372420
373421
374422def _remove_dataset_chache_dir (did ):
423+ """Remove the dataset cache directory"""
375424 dataset_cache_dir = os .path .join (config .get_cache_directory (), "datasets" , str (did ))
376425 try :
377426 os .rmdir (dataset_cache_dir )
@@ -381,6 +430,20 @@ def _remove_dataset_chache_dir(did):
381430
382431
383432def _create_dataset_from_description (description , arff_file ):
433+ """Create a dataset object from a description dict.
434+
435+ Parameters
436+ ----------
437+ description : dict
438+ Description of a dataset in xmlish dict.
439+ arff_file : string
440+ Path of dataset arff file.
441+
442+ Returns
443+ -------
444+ dataset : dataset object
445+ Dataset object from dict and arff.
446+ """
384447 dataset = OpenMLDataset (
385448 description ["oml:id" ],
386449 description ["oml:name" ],
0 commit comments