2323import xmltodict
2424
2525from .entities .dataset import OpenMLDataset
26- from .entities .task import Task
26+ from .entities .task import OpenMLTask
2727from .entities .split import OpenMLSplit
2828from .util import is_string
2929
3030logger = logging .getLogger (__name__ )
3131
32- OPENML_URL = "http://www .openml.org"
32+ OPENML_URL = "http://api_new .openml.org/v1/ "
3333
3434
3535class OpenMLStatusChange (Warning ):
@@ -52,10 +52,6 @@ def __init__(self, message):
5252 super (OpenMLServerError , self ).__init__ (message )
5353
5454
55- class AuthentificationError (PyOpenMLError ):
56- def __init__ (self , message ):
57- super (AuthentificationError , self ).__init__ (message )
58-
5955class OpenMLCacheException (PyOpenMLError ):
6056 def __init__ (self , message ):
6157 super (OpenMLCacheException , self ).__init__ (message )
@@ -79,14 +75,9 @@ class APIConnector(object):
7975 directory '.openml/cache' in the users home directory will be used.
8076 If either directory does not exist, it will be created.
8177
82- username : string, optional (default=None)
83- Your username.
84-
85- password : string, optional (default=None)
86- Your passwort. This will not be stored! Instead, the md5 hash is
87- calculated and used to authenticate to the OpenML server,
88- which returns a session key. This session key is the only credential
89- which is stored.
78+ apikey : string, optional (default=None)
79+ Your OpenML API key which will be used to authenticate you at the OpenML
80+ server.
9081
9182 server : string, optional (default=None)
9283 The OpenML server to connect to.
@@ -112,24 +103,18 @@ class APIConnector(object):
112103 Raises
113104 ------
114105 ValueError
115- If one of the following variables is neither specified in the
116- configuration file nor when creating the APIconnector class:
117- username, password
118- AuthentificationError
119- If authentification at the OpenML server does not work.
106+ If apikey is neither specified in the config nor given as an argument.
120107 OpenMLServerError
121108 If the OpenML server returns an unexptected response.
122109
123110 Testing the API calls in Firefox
124111 --------------------------------
125- With the Firefox AddOn HTTPRequestor, one can check the OpenML API calls
126- which need authentification (by providing such). First, create the md5
127- hash of your OpenML password and add it to
112+ With the Firefox AddOn HTTPRequestor, one can check the OpenML API calls.
128113
129114 """
130- def __init__ (self , cache_directory = None , username = None , password = None ,
115+ def __init__ (self , cache_directory = None , apikey = None ,
131116 server = None , verbosity = None , configure_logger = True ,
132- authenticate = True , private_directory = None ):
117+ private_directory = None ):
133118 # The .openml directory is necessary, just try to create it (EAFP)
134119 try :
135120 os .mkdir (os .path .expanduser ('~/.openml' ))
@@ -141,10 +126,8 @@ def __init__(self, cache_directory=None, username=None, password=None,
141126 self .config = self ._parse_config ()
142127 if cache_directory is not None :
143128 self .config .set ('FAKE_SECTION' , 'cachedir' , cache_directory )
144- if username is not None :
145- self .config .set ('FAKE_SECTION' , 'username' , username )
146- if password is not None :
147- self .config .set ('FAKE_SECTION' , 'password' , password )
129+ if apikey is not None :
130+ self .config .set ('FAKE_SECTION' , 'apikey' , apikey )
148131 if server is not None :
149132 self .config .set ('FAKE_SECTION' , 'server' , server )
150133 if verbosity is not None :
@@ -164,11 +147,6 @@ def __init__(self, cache_directory=None, username=None, password=None,
164147 format = '[%(levelname)s] [%(asctime)s:%(name)s] %('
165148 'message)s' , datefmt = '%H:%M:%S' , level = level )
166149
167- if authenticate :
168- self ._session_hash = self ._authenticate (
169- self .config .get ('FAKE_SECTION' , 'username' ),
170- self .config .get ('FAKE_SECTION' , 'password' ))
171-
172150 # Set up the cache directories
173151 self .cache_dir = self .config .get ('FAKE_SECTION' , 'cachedir' )
174152 self .dataset_cache_dir = os .path .join (self .cache_dir , "datasets" )
@@ -189,50 +167,8 @@ def __init__(self, cache_directory=None, username=None, password=None,
189167 if not os .path .exists (dir_ ) and not os .path .isdir (dir_ ):
190168 os .mkdir (dir_ )
191169
192- def _authenticate (self , username , password ):
193- # Check the username
194- if username is None :
195- raise ValueError ("No username specified." )
196- elif not is_string (username ):
197- raise ValueError ("Username must be of type string." )
198- elif not username :
199- raise ValueError ("No value for argument username specified." )
200-
201- # Check the password
202- if password is None :
203- raise ValueError ("No password specified." )
204- elif not is_string (password ):
205- raise ValueError ("Password must be of type string." )
206- elif not password :
207- raise ValueError ("No value for argument password specified." )
208-
209- m = hashlib .md5 ()
210- m .update (password .encode ('utf-8' ))
211- md5 = m .hexdigest ()
212-
213- # TODO: catch possible exceptions
214- data = {'username' : username , 'password' : md5 }
215- return_code , xml_string = self ._perform_api_call (
216- "openml.authenticate" , data = data , add_authentication = False )
217-
218- xml_dict = xmltodict .parse (xml_string )
219- if xml_dict .get ('oml:authenticate' ):
220- session_hash = xml_dict ['oml:authenticate' ]['oml:session_hash' ]
221- return session_hash
222- elif xml_dict .get ('oml:error' ):
223- error_code = xml_dict ['oml:error' ]['oml:code' ]
224- if error_code == '252' :
225- raise AuthentificationError (
226- "Authentication failed. The username and password did not "
227- "match any record in the database." )
228- else :
229- OpenMLServerError (
230- "Unexpected server response code %d with response"
231- " message %s" % (return_code , xml_string ))
232-
233170 def _parse_config (self ):
234- defaults = {'username' : '' ,
235- 'password' : '' ,
171+ defaults = {'apikey' : '' ,
236172 'server' : OPENML_URL ,
237173 'verbosity' : 0 ,
238174 'cachedir' : os .path .expanduser ('~/.openml/cache' ),
@@ -438,7 +374,7 @@ def get_dataset_list(self):
438374 these are also returned.
439375 """
440376 # TODO add proper error handling here!
441- return_code , xml_string = self ._perform_api_call ("openml. data" )
377+ return_code , xml_string = self ._perform_api_call ("data/list/ " )
442378 datasets_dict = xmltodict .parse (xml_string )
443379
444380 # Minimalistic check if the XML is useful
@@ -553,7 +489,7 @@ def download_dataset_description(self, did):
553489 except (OpenMLCacheException ):
554490 try :
555491 return_code , dataset_xml = self ._perform_api_call (
556- "openml. data.description" , data_id = did )
492+ "data/%d" % did )
557493 except (URLError , UnicodeEncodeError ) as e :
558494 # TODO logger.debug
559495 self ._remove_dataset_chache_dir (did )
@@ -568,7 +504,7 @@ def download_dataset_description(self, did):
568504 "oml:data_set_description" ]
569505 except Exception as e :
570506 # TODO logger.debug
571- self ._remove_dataset_chache_dir ()
507+ self ._remove_dataset_chache_dir (did )
572508 print ("Dataset ID" , did )
573509 raise e
574510
@@ -613,7 +549,7 @@ def download_dataset_features(self, did):
613549 except (OSError , IOError ):
614550 try :
615551 return_code , features_xml = self ._perform_api_call (
616- "openml. data. features" , data_id = did )
552+ "data/ features/%d" % did )
617553 except (URLError , UnicodeEncodeError ) as e :
618554 # TODO logger.debug
619555 print (e )
@@ -637,7 +573,7 @@ def download_dataset_qualities(self, did):
637573 qualities_file = os .path .join (did_cache_dir , "qualities.xml" )
638574 try :
639575 return_code , qualities_xml = self ._perform_api_call (
640- "openml. data. qualities" , data_id = did )
576+ "data/ qualities/%d" % did )
641577 except (URLError , UnicodeEncodeError ) as e :
642578 # TODO logger.debug
643579 print (e )
@@ -725,7 +661,7 @@ def get_task_list(self, task_type_id=1):
725661 "cast to an Integer." )
726662
727663 return_code , xml_string = self ._perform_api_call (
728- "openml.tasks" , task_type_id = task_type_id )
664+ "task/list/%d" % task_type_id )
729665 tasks_dict = xmltodict .parse (xml_string )
730666 # Minimalistic check if the XML is useful
731667 assert tasks_dict ['oml:tasks' ]['@xmlns:oml' ] == \
@@ -775,7 +711,7 @@ def download_task(self, task_id):
775711
776712 try :
777713 return_code , task_xml = self ._perform_api_call (
778- "openml. task.search" , task_id = task_id )
714+ "task/%d" % task_id )
779715 except (URLError , UnicodeEncodeError ) as e :
780716 print (e )
781717 raise e
@@ -819,7 +755,7 @@ def _create_task_from_xml(self, xml):
819755 text = parameter .get ("#text" , "" )
820756 estimation_parameters [name ] = text
821757
822- return Task (
758+ return OpenMLTask (
823759 dic ["oml:task_id" ], dic ["oml:task_type" ],
824760 inputs ["source_data" ]["oml:data_set" ]["oml:data_set_id" ],
825761 inputs ["source_data" ]["oml:data_set" ]["oml:target_feature" ],
@@ -878,21 +814,38 @@ def _create_task_cache_dir(self, task_id):
878814 return task_cache_dir
879815
880816 def _perform_api_call (self , call , data = None , filePath = None , add_authentication = True ,
881- ** kwargs ):
882- # TODO: do input validation!
883- url = self .config .get ("FAKE_SECTION" , "server" ) + "/api/?f="
884- url += "" + call
885- if kwargs :
886- for key in kwargs :
887- url += "&" + key + "=" + str (kwargs [key ])
888- # TODO logger.debug(url)
817+ """Perform an API call at the OpenML server.
889818 return self._read_url(url, data=data, filePath=filePath,
890- add_authentication = add_authentication )
891819
892820 def _read_url(self, url, add_authentication=False, data=None, filePath=None):
821+
822+ Parameters
823+ ----------
824+ call : str
825+ The API call. For example data/list
826+ data : dict (default=None)
827+ Dictionary containing data which will be sent to the OpenML
828+ server via a POST request.
829+ **kwargs
830+ Further arguments which are appended as GET arguments.
831+
832+ Returns
833+ -------
834+ return_code : int
835+ HTTP return code
836+ return_value : str
837+ Return value of the OpenML server
838+ """
839+ url = self .config .get ("FAKE_SECTION" , "server" )
840+ if not url .endswith ("/" ):
841+ url += "/"
842+ url += call
843+ return self ._read_url (url , data = data )
844+
845+ def _read_url (self , url , data = None ):
893846 if data is None :
894847 data = {}
895- if add_authentication :
848+ data [ 'session_hash' ] = self . config . get ( 'FAKE_SECTION' , 'apikey' )
896849 data ['session_hash' ] = self ._session_hash
897850
898851 if filePath is not None :
0 commit comments