88
99import numpy as np
1010import scipy .sparse
11+ import xmltodict
1112
1213if sys .version_info [0 ] >= 3 :
1314 import pickle
1718 except :
1819 import pickle
1920
21+
2022from ..util import is_string
2123from .._api_calls import _perform_api_call
2224
@@ -36,7 +38,7 @@ class OpenMLDataset(object):
3638 Description of the dataset
3739 FIXME : which of these do we actually nee?
3840 """
39- def __init__ (self , id = None , name = None , version = None , description = None ,
41+ def __init__ (self , dataset_id = None , name = None , version = None , description = None ,
4042 format = None , creator = None , contributor = None ,
4143 collection_date = None , upload_date = None , language = None ,
4244 licence = None , url = None , default_target_attribute = None ,
@@ -45,7 +47,7 @@ def __init__(self, id=None, name=None, version=None, description=None,
4547 original_data_url = None , paper_url = None , update_comment = None ,
4648 md5_checksum = None , data_file = None ):
4749 # Attributes received by querying the RESTful API
48- self .id = int (id ) if id is not None else None
50+ self .dataset_id = int (dataset_id ) if dataset_id is not None else None
4951 self .name = name
5052 self .version = int (version )
5153 self .description = description
@@ -100,7 +102,7 @@ def __init__(self, id=None, name=None, version=None, description=None,
100102 with open (self .data_pickle_file , "wb" ) as fh :
101103 pickle .dump ((X , categorical , attribute_names ), fh , - 1 )
102104 logger .debug ("Saved dataset %d: %s to file %s" %
103- (self .id , self .name , self .data_pickle_file ))
105+ (self .dataset_id , self .name , self .data_pickle_file ))
104106
105107 def __eq__ (self , other ):
106108 if type (other ) != OpenMLDataset :
@@ -281,7 +283,8 @@ def publish(self):
281283 "/data/" , file_dictionary = file_dictionary ,
282284 file_elements = file_elements )
283285
284- return return_code , return_value
286+ self .dataset_id = int (xmltodict .parse (return_value )['oml:upload_data_set' ]['oml:id' ])
287+ return self
285288
286289 def _to_xml (self ):
287290 """Serialize object to xml for upload
@@ -292,7 +295,7 @@ def _to_xml(self):
292295 XML description of the data.
293296 """
294297 xml_dataset = ('<oml:data_set_description '
295- 'xmlns:oml="http://openml.org/openml">' )
298+ 'xmlns:oml="http://openml.org/openml">\n ' )
296299 props = ['id' , 'name' , 'version' , 'description' , 'format' , 'creator' ,
297300 'contributor' , 'collection_date' , 'upload_date' , 'language' ,
298301 'licence' , 'url' , 'default_target_attribute' ,
@@ -302,6 +305,6 @@ def _to_xml(self):
302305 for prop in props :
303306 content = getattr (self , prop , None )
304307 if content is not None :
305- xml_dataset += "<oml:{0}>{1}</oml:{0}>" .format (prop , content )
308+ xml_dataset += "<oml:{0}>{1}</oml:{0}>\n " .format (prop , content )
306309 xml_dataset += "</oml:data_set_description>"
307310 return xml_dataset
0 commit comments