11from collections import OrderedDict
2+ import hashlib
23import logging
34import os
45import re
@@ -816,154 +817,9 @@ def _create_task_cache_dir(self, task_id):
816817 pass
817818 return task_cache_dir
818819
819- ############################################################################
820- # Runs
821- def get_runs_list (self , task_id = None , flow_id = None , setup_id = None ):
822- """Return a list of all runs for either a task, flow or setup.
823-
824- Exactly one of the optional parameters must be given.
825-
826- Parameters
827- ----------
828- task_id : int, optional
829- flow_id : int, optional
830- setup_id : int, optional
831-
832- Returns
833- -------
834- list
835- A list of all runs run IDs for a given ID.
820+ def _perform_api_call (self , call , data = None , file_dictionary = None , add_authentication = True ):
836821 """
837- test = [task_id is None , flow_id is None , setup_id is None ]
838- if np .nansum (test ) != 2 :
839- raise ValueError
840-
841- call = "run/list"
842-
843- if task_id is not None :
844- call += "?task_id=%d" % task_id
845- elif flow_id is not None :
846- call += "?implementation_id=%d" % flow_id
847- elif setup_id is not None :
848- call += "?setup_id=%d" % setup_id
849-
850- return_code , xml_string = self ._perform_api_call (call )
851- datasets_dict = xmltodict .parse (xml_string )
852-
853-
854- if isinstance (datasets_dict ['oml:runs' ]['oml:run' ], dict ):
855- runs = [datasets_dict ['oml:runs' ]['oml:run' ]]
856- else :
857- # Minimalistic check if the XML is useful
858- assert type (datasets_dict ['oml:runs' ]['oml:run' ]) == list , \
859- type (datasets_dict ['oml:runs' ]['oml:run' ])
860- assert datasets_dict ['oml:runs' ]['@xmlns:oml' ] == \
861- 'http://openml.org/openml'
862-
863- runs = []
864- for runs_ in datasets_dict ['oml:runs' ]['oml:run' ]:
865- run = {'run_id' : int (runs_ ['oml:run_id' ]),
866- 'task_id' : int (runs_ ['oml:task_id' ]),
867- 'setup_id' : int (runs_ ['oml:setup_id' ]),
868- 'implementation_id' : int (runs_ ['oml:implementation_id' ]),
869- 'uploader' : int (runs_ ['oml:uploader' ])}
870-
871- runs .append (run )
872- runs .sort (key = lambda t : t ['run_id' ])
873-
874- return runs
875-
876- def download_run (self , run_id ):
877- """Download the OpenML run for a given run ID.
878-
879- Parameters
880- ----------
881- run_id : int
882- The OpenML run id.
883- """
884- try :
885- run_id = int (run_id )
886- except :
887- raise ValueError ("Task ID is neither an Integer nor can be "
888- "cast to an Integer." )
889-
890- xml_file = os .path .join (self ._create_run_cache_dir (run_id ),
891- "run.xml" )
892-
893- try :
894- with open (xml_file ) as fh :
895- run = self ._create_run_from_xml (fh .read ())
896- except (OSError , IOError ):
897-
898- try :
899- return_code , run_xml = self ._perform_api_call (
900- "run/%d" % run_id )
901- except (URLError , UnicodeEncodeError ) as e :
902- print (e )
903- raise e
904-
905- # Cache the xml task file
906- if os .path .exists (xml_file ):
907- with open (xml_file ) as fh :
908- local_xml = fh .read ()
909-
910- if run_xml != local_xml :
911- raise ValueError ("Run description of run %d cached at %s "
912- "has changed." % (run_id , xml_file ))
913-
914- else :
915- with open (xml_file , "w" ) as fh :
916- fh .write (run_xml )
917-
918- run = self ._create_run_from_xml (run_xml )
919-
920- return run
921-
922- def _create_run_cache_dir (self , run_id ):
923- run_cache_dir = os .path .join (self .task_cache_dir , str (run_id ))
924-
925- try :
926- os .makedirs (run_cache_dir )
927- except (IOError , OSError ):
928- # TODO add debug information!
929- pass
930- return run_cache_dir
931-
932- def _create_run_from_xml (self , xml ):
933- dic = xmltodict .parse (xml )[u"oml:run" ]
934- datasets = []
935- for key in dic [u'oml:input_data' ]:
936- dataset = dic [u'oml:input_data' ][key ]
937- did = dataset [u'oml:did' ]
938- datasets .append (did )
939-
940- tags = []
941- for tag in dic [u"oml:tag" ]:
942- tags .append (tag )
943-
944- files = dict ()
945- for file_ in dic [u"oml:output_data" ][u"oml:file" ]:
946- name = file_ [u"oml:name" ]
947- url = file_ [u"oml:url" ]
948- files [name ] = url
949-
950- evaluations = dict ()
951- for evaluation in dic [u"oml:output_data" ][u"oml:evaluation" ]:
952- name = evaluation [u"oml:name" ]
953- value = evaluation .get (u"oml:value" )
954- value_array = evaluation .get (u"oml:array_data" )
955- evaluations [name ] = (value , value_array )
956-
957- return OpenMLRun (
958- dic [u"oml:run_id" ], dic [u"oml:uploader" ],
959- dic [u"oml:task_id" ], dic [u"oml:implementation_id" ],
960- dic [u"oml:setup_string" ], dic [u'oml:setup_id' ],
961- tags , datasets , files , evaluations )
962-
963- ############################################################################
964- # Internal stuff
965- def _perform_api_call (self , call , data = None , file_path = None ):
966- """Perform an API call at the OpenML server.
822+ Perform an API call at the OpenML server.
967823 return self._read_url(url, data=data, filePath=filePath,
968824 def _read_url(self, url, add_authentication=False, data=None, filePath=None):
969825
@@ -988,33 +844,35 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
988844 if not url .endswith ("/" ):
989845 url += "/"
990846 url += call
991- return self ._read_url (url , data = data , file_path = file_path )
847+ return self ._read_url (url , data = data , file_dictionary = file_dictionary )
992848
993- def _read_url (self , url , data = None , file_path = None ):
849+ def _read_url (self , url , data = None , file_dictionary = None ):
994850 if data is None :
995851 data = {}
996852 data ['api_key' ] = self .config .get ('FAKE_SECTION' , 'apikey' )
997853
998- if file_path is not None :
999- if os .path .isabs (file_path ):
1000- try :
1001- decoder = arff .ArffDecoder ()
1002- except :
1003- raise "The file you provided is not a valid arff file"
1004-
1005- fileElement = {'dataset' : open (file_path , 'rb' )}
1006- data ['description' ]= data .get ('description' )
1007- data .pop ('dataset' , None )
854+ if file_dictionary is not None :
855+ file_elements = {}
856+ for key , path in file_dictionary .items ():
857+ if os .path .isabs (path ) and os .path .exists (path ):
858+ try :
859+ if key is 'dataset' :
860+ decoder = arff .ArffDecoder ()
861+ with open (path ) as fh :
862+ decoder .decode (fh , encode_nominal = True )
863+ except :
864+ raise ValueError ("The file you have provided is not a valid arff file" )
1008865
1009- try :
1010- response = requests .post (url , data = data , files = fileElement )
1011- except URLError as error :
1012- print (error )
866+ file_elements [key ] = open (path , 'rb' )
1013867
868+ else :
869+ raise ValueError ("File doesn't exist" )
870+ try :
871+ response = requests .post (url , data = data , files = file_elements )
1014872 return response .status_code , response
1015- else :
1016- raise "File doesn't exists"
1017873
874+ except URLError as error :
875+ print (error )
1018876 else :
1019877 data = urlencode (data )
1020878 data = data .encode ('utf-8' )
@@ -1059,38 +917,41 @@ def _read_url(self, url, data=None, file_path=None):
1059917 def upload_dataset (self , description , file_path = None ):
1060918 try :
1061919 data = {'description' : description }
1062- return_code , dataset_xml = self ._perform_api_call (
1063- "/data/" , data = data , file_path = file_path )
920+ if file_path is not None :
921+ return_code , dataset_xml = self ._perform_api_call ("/data/" ,data = data , file_dictionary = {'dataset' : file_path })
922+ else :
923+ return_code , dataset_xml = self ._perform_api_call ("/data/" ,data = data )
1064924
1065925 except URLError as e :
1066926 # TODO logger.debug
1067927 print (e )
1068928 raise e
1069929 return return_code , dataset_xml
1070930
1071- def upload_flow (self , description , binary , source ):
931+ def upload_flow (self , description , file_path = None ):
1072932 try :
1073- data = {'description' : description , 'binary' : binary , 'source' : source }
1074- return_code , dataset_xml = self ._perform_api_call (
1075- "openml.implementation.upload" , data = data )
933+ data = {'description' : description }
934+ return_code , dataset_xml = self ._perform_api_call ("/flow/" , data = data , file_dictionary = {'source' : file_path })
1076935
1077936 except URLError as e :
1078937 # TODO logger.debug
1079938 print (e )
1080939 raise e
1081940 return return_code , dataset_xml
1082941
1083- def upload_run (self , description , files ):
1084- try :
1085- data = {'description' : description }
1086- for key , value in files :
1087- data [key ] = value
942+ def upload_run (self , files ):
943+ file_dictionary = {}
944+ if 'predictions' in files :
945+ try :
946+ for key , value in files .items ():
947+ file_dictionary [key ] = value
1088948
1089- return_code , dataset_xml = self ._perform_api_call ("openml.run.upload" , data = data )
1090-
1091- except URLError as e :
1092- # TODO logger.debug
1093- print (e )
1094- raise e
1095- return return_code , dataset_xml
949+ return_code , dataset_xml = self ._perform_api_call ("/run/" , file_dictionary = file_dictionary )
1096950
951+ except URLError as e :
952+ # TODO logger.debug
953+ print (e )
954+ raise e
955+ return return_code , dataset_xml
956+ else :
957+ raise ValueError ("prediction files doesn't exist" )
0 commit comments