2222
2323
2424class OpenMLDataset (object ):
25-
25+ """Dataset object.
26+
27+ Allows fetching and uploading datasets to OpenML.
28+
29+ Parameters
30+ ----------
31+ name : string
32+ Name of the dataset
33+ description : string
34+ Description of the dataset
35+ FIXME : which of these do we actually nee?
36+ """
2637 def __init__ (self , id = None , name = None , version = None , description = None ,
2738 format = None , creator = None , contributor = None ,
2839 collection_date = None , upload_date = None , language = None ,
@@ -63,7 +74,7 @@ def __init__(self, id=None, name=None, version=None, description=None,
6374 logger .debug ("Data pickle file already exists." )
6475 else :
6576 try :
66- data = self .get_arff ()
77+ data = self ._get_arff ()
6778 except OSError as e :
6879 logger .critical ("Please check that the data file %s is there "
6980 "and can be read." , self .data_file )
@@ -98,9 +109,7 @@ def __eq__(self, other):
98109 else :
99110 return False
100111
101- ##########################################################################
102- # ARFF related stuff
103- def get_arff (self ):
112+ def _get_arff (self ):
104113 # TODO: add a partial read method which only returns the attribute
105114 # headers of the corresponding .arff file!
106115
@@ -124,11 +133,20 @@ def decode_arff(fh):
124133 with open (filename ) as fh :
125134 return decode_arff (fh )
126135
127- ##########################################################################
128- def get_dataset (self , target = None , target_dtype = int , include_row_id = False ,
129- include_ignore_attributes = False ,
130- return_categorical_indicator = False ,
131- return_attribute_names = False ):
136+ def get_data (self , target = None , target_dtype = int , include_row_id = False ,
137+ include_ignore_attributes = False ,
138+ return_categorical_indicator = False ,
139+ return_attribute_names = False ):
140+ """Returns dataset content as numpy arrays / sparse matrices.
141+
142+ Parameters
143+ ----------
144+
145+
146+ Returns
147+ -------
148+
149+ """
132150 rval = []
133151
134152 path = self .data_pickle_file
@@ -224,6 +242,13 @@ def retrieve_class_labels(self):
224242 return None
225243
226244 def publish (self ):
245+ """Publish the dataset on the OpenML server.
246+
247+ Upload the dataset description and dataset content to openml.
248+
249+ Returns
250+ -------
251+ """
227252 data = {'description' : self .to_xml ()}
228253 if self .data_file is not None :
229254 return_code , return_value = _perform_api_call (
0 commit comments