11from collections import OrderedDict
2- import hashlib
32import logging
43import os
54import re
2524from .entities .dataset import OpenMLDataset
2625from .entities .task import OpenMLTask
2726from .entities .split import OpenMLSplit
27+ from .entities .run import OpenMLRun
2828from .util import is_string
2929
30+ import numpy as np
31+
3032logger = logging .getLogger (__name__ )
3133
3234OPENML_URL = "http://api_new.openml.org/v1/"
@@ -806,16 +808,162 @@ def _download_split(self, task, cache_file):
806808
807809 def _create_task_cache_dir (self , task_id ):
808810 task_cache_dir = os .path .join (self .task_cache_dir , str (task_id ))
811+
809812 try :
810813 os .makedirs (task_cache_dir )
811814 except (IOError , OSError ):
812815 # TODO add debug information!
813816 pass
814817 return task_cache_dir
815818
816- def _perform_api_call (self , call , data = None , filePath = None , add_authentication = True ):
819+ ############################################################################
820+ # Runs
821+ def get_runs_list (self , task_id = None , flow_id = None , setup_id = None ):
822+ """Return a list of all runs for either a task, flow or setup.
823+
824+ Exactly one of the optional parameters must be given.
825+
826+ Parameters
827+ ----------
828+ task_id : int, optional
829+ flow_id : int, optional
830+ setup_id : int, optional
831+
832+ Returns
833+ -------
834+ list
835+ A list of all runs run IDs for a given ID.
817836 """
818- Perform an API call at the OpenML server.
837+ test = [task_id is None , flow_id is None , setup_id is None ]
838+ if np .nansum (test ) != 2 :
839+ raise ValueError
840+
841+ call = "run/list"
842+
843+ if task_id is not None :
844+ call += "?task_id=%d" % task_id
845+ elif flow_id is not None :
846+ call += "?implementation_id=%d" % flow_id
847+ elif setup_id is not None :
848+ call += "?setup_id=%d" % setup_id
849+
850+ return_code , xml_string = self ._perform_api_call (call )
851+ datasets_dict = xmltodict .parse (xml_string )
852+
853+
854+ if isinstance (datasets_dict ['oml:runs' ]['oml:run' ], dict ):
855+ runs = [datasets_dict ['oml:runs' ]['oml:run' ]]
856+ else :
857+ # Minimalistic check if the XML is useful
858+ assert type (datasets_dict ['oml:runs' ]['oml:run' ]) == list , \
859+ type (datasets_dict ['oml:runs' ]['oml:run' ])
860+ assert datasets_dict ['oml:runs' ]['@xmlns:oml' ] == \
861+ 'http://openml.org/openml'
862+
863+ runs = []
864+ for runs_ in datasets_dict ['oml:runs' ]['oml:run' ]:
865+ run = {'run_id' : int (runs_ ['oml:run_id' ]),
866+ 'task_id' : int (runs_ ['oml:task_id' ]),
867+ 'setup_id' : int (runs_ ['oml:setup_id' ]),
868+ 'implementation_id' : int (runs_ ['oml:implementation_id' ]),
869+ 'uploader' : int (runs_ ['oml:uploader' ])}
870+
871+ runs .append (run )
872+ runs .sort (key = lambda t : t ['run_id' ])
873+
874+ return runs
875+
876+ def download_run (self , run_id ):
877+ """Download the OpenML run for a given run ID.
878+
879+ Parameters
880+ ----------
881+ run_id : int
882+ The OpenML run id.
883+ """
884+ try :
885+ run_id = int (run_id )
886+ except :
887+ raise ValueError ("Task ID is neither an Integer nor can be "
888+ "cast to an Integer." )
889+
890+ xml_file = os .path .join (self ._create_run_cache_dir (run_id ),
891+ "run.xml" )
892+
893+ try :
894+ with open (xml_file ) as fh :
895+ run = self ._create_run_from_xml (fh .read ())
896+ except (OSError , IOError ):
897+
898+ try :
899+ return_code , run_xml = self ._perform_api_call (
900+ "run/%d" % run_id )
901+ except (URLError , UnicodeEncodeError ) as e :
902+ print (e )
903+ raise e
904+
905+ # Cache the xml task file
906+ if os .path .exists (xml_file ):
907+ with open (xml_file ) as fh :
908+ local_xml = fh .read ()
909+
910+ if run_xml != local_xml :
911+ raise ValueError ("Run description of run %d cached at %s "
912+ "has changed." % (run_id , xml_file ))
913+
914+ else :
915+ with open (xml_file , "w" ) as fh :
916+ fh .write (run_xml )
917+
918+ run = self ._create_run_from_xml (run_xml )
919+
920+ return run
921+
922+ def _create_run_cache_dir (self , run_id ):
923+ run_cache_dir = os .path .join (self .task_cache_dir , str (run_id ))
924+
925+ try :
926+ os .makedirs (run_cache_dir )
927+ except (IOError , OSError ):
928+ # TODO add debug information!
929+ pass
930+ return run_cache_dir
931+
932+ def _create_run_from_xml (self , xml ):
933+ dic = xmltodict .parse (xml )[u"oml:run" ]
934+ datasets = []
935+ for key in dic [u'oml:input_data' ]:
936+ dataset = dic [u'oml:input_data' ][key ]
937+ did = dataset [u'oml:did' ]
938+ datasets .append (did )
939+
940+ tags = []
941+ for tag in dic [u"oml:tag" ]:
942+ tags .append (tag )
943+
944+ files = dict ()
945+ for file_ in dic [u"oml:output_data" ][u"oml:file" ]:
946+ name = file_ [u"oml:name" ]
947+ url = file_ [u"oml:url" ]
948+ files [name ] = url
949+
950+ evaluations = dict ()
951+ for evaluation in dic [u"oml:output_data" ][u"oml:evaluation" ]:
952+ name = evaluation [u"oml:name" ]
953+ value = evaluation .get (u"oml:value" )
954+ value_array = evaluation .get (u"oml:array_data" )
955+ evaluations [name ] = (value , value_array )
956+
957+ return OpenMLRun (
958+ dic [u"oml:run_id" ], dic [u"oml:uploader" ],
959+ dic [u"oml:task_id" ], dic [u"oml:implementation_id" ],
960+ dic [u"oml:setup_string" ], dic [u'oml:setup_id' ],
961+ tags , datasets , files , evaluations )
962+
963+ ############################################################################
964+ # Internal stuff
965+ def _perform_api_call (self , call , data = None , file_path = None ):
966+ """Perform an API call at the OpenML server.
819967 return self._read_url(url, data=data, filePath=filePath,
820968 def _read_url(self, url, add_authentication=False, data=None, filePath=None):
821969
@@ -840,32 +988,33 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
840988 if not url .endswith ("/" ):
841989 url += "/"
842990 url += call
843- return self ._read_url (url , data = data ,filePath = filePath )
991+ return self ._read_url (url , data = data , file_path = file_path )
844992
845- def _read_url (self , url , data = None , filePath = None ):
993+ def _read_url (self , url , data = None , file_path = None ):
846994 if data is None :
847995 data = {}
848- data ['session_hash ' ] = self .config .get ('FAKE_SECTION' , 'apikey' )
996+ data ['api_key ' ] = self .config .get ('FAKE_SECTION' , 'apikey' )
849997
850- if filePath is not None :
851- if os .path .isabs (filePath ):
998+ if file_path is not None :
999+ if os .path .isabs (file_path ):
8521000 try :
8531001 decoder = arff .ArffDecoder ()
8541002 except :
8551003 raise "The file you provided is not a valid arff file"
8561004
857- fileElement = {'dataset' : open (filePath , 'rb' )}
1005+ fileElement = {'dataset' : open (file_path , 'rb' )}
8581006 data ['description' ]= data .get ('description' )
8591007 data .pop ('dataset' , None )
8601008
8611009 try :
8621010 response = requests .post (url , data = data , files = fileElement )
863- except URLError , error :
864- print error
1011+ except URLError as error :
1012+ print ( error )
8651013
8661014 return response .status_code , response
8671015 else :
8681016 raise "File doesn't exists"
1017+
8691018 else :
8701019 data = urlencode (data )
8711020 data = data .encode ('utf-8' )
@@ -907,10 +1056,11 @@ def _read_url(self, url, data=None, filePath=None):
9071056 string .write (chunk )
9081057 return return_code , string .getvalue ()
9091058
910- def upload_dataset (self , description , filePath = None ):
1059+ def upload_dataset (self , description , file_path = None ):
9111060 try :
9121061 data = {'description' : description }
913- return_code , dataset_xml = self ._perform_api_call ("/data/" ,data = data , filePath = filePath )
1062+ return_code , dataset_xml = self ._perform_api_call (
1063+ "/data/" , data = data , file_path = file_path )
9141064
9151065 except URLError as e :
9161066 # TODO logger.debug
@@ -921,7 +1071,8 @@ def upload_dataset(self, description, filePath=None):
9211071 def upload_flow (self , description , binary , source ):
9221072 try :
9231073 data = {'description' : description , 'binary' : binary , 'source' : source }
924- return_code , dataset_xml = self ._perform_api_call ("openml.implementation.upload" , data = data )
1074+ return_code , dataset_xml = self ._perform_api_call (
1075+ "openml.implementation.upload" , data = data )
9251076
9261077 except URLError as e :
9271078 # TODO logger.debug
0 commit comments