44
55import arff
66import xmltodict
7+ from sklearn .base import BaseEstimator
78
9+ import openml
810from ..tasks import get_task
911from .._api_calls import _perform_api_call
10-
12+ from .. exceptions import PyOpenMLError
1113
1214class OpenMLRun (object ):
1315 """OpenML Run: result of running a model on an openml dataset.
@@ -17,10 +19,10 @@ class OpenMLRun(object):
1719 FIXME
1820
1921 """
20- def __init__ (self , task_id , flow_id , dataset_id , setup_string = None ,
22+ def __init__ (self , task_id , flow_id , dataset_id , setup_string = None ,
2123 files = None , setup_id = None , tags = None , uploader = None , uploader_name = None ,
2224 evaluations = None , detailed_evaluations = None ,
23- data_content = None , model = None , task_type = None ,
25+ data_content = None , trace_content = None , model = None , task_type = None ,
2426 task_evaluation_measure = None , flow_name = None ,
2527 parameter_settings = None , predictions_url = None , task = None ,
2628 flow = None , run_id = None ):
@@ -39,12 +41,14 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
3941 self .evaluations = evaluations
4042 self .detailed_evaluations = detailed_evaluations
4143 self .data_content = data_content
44+ self .trace_content = trace_content
4245 self .task = task
4346 self .flow = flow
4447 self .run_id = run_id
48+ self .model = model
4549
4650 def _generate_arff_dict (self ):
47- """Generates the arff dictionary for upload to the server.
51+ """Generates the arff dictionary for uploading predictions to the server.
4852
4953 Assumes that the run has been executed.
5054
@@ -74,6 +78,20 @@ def _generate_arff_dict(self):
7478 arff_dict ['relation' ] = 'openml_task_' + str (task .task_id ) + '_predictions'
7579 return arff_dict
7680
81+ def _generate_trace_arff_dict (self ):
82+ if self .trace_content is None :
83+ raise ValueError ('No trace content avaiable. (This should never happen.)' )
84+ arff_dict = {}
85+ arff_dict ['attributes' ] = [('repeat' , 'NUMERIC' ),
86+ ('fold' , 'NUMERIC' ),
87+ ('iteration' , 'NUMERIC' ),
88+ ('setup_string' , 'STRING' ),
89+ ('evaluation' , 'NUMERIC' ),
90+ ('selected' , ['true' , 'false' ])]
91+ arff_dict ['data' ] = self .trace_content
92+ arff_dict ['relation' ] = 'openml_task_' + str (self .task_id ) + '_predictions'
93+ return arff_dict
94+
7795 def publish (self ):
7896 """Publish a run to the OpenML server.
7997
@@ -84,10 +102,18 @@ def publish(self):
84102 -------
85103 self : OpenMLRun
86104 """
105+ if self .model is None :
106+ raise PyOpenMLError ("OpenMLRun obj does not contain a model. (This should never happen.) " );
107+
87108 predictions = arff .dumps (self ._generate_arff_dict ())
88109 description_xml = self ._create_description_xml ()
89- file_elements = {'predictions' : ("predictions.csv" , predictions ),
110+
111+ file_elements = {'predictions' : ("predictions.arff" , predictions ),
90112 'description' : ("description.xml" , description_xml )}
113+ if self .trace_content is not None :
114+ trace_arff = arff .dumps (self ._generate_trace_arff_dict ())
115+ file_elements ['trace' ] = ("trace.arff" , trace_arff )
116+
91117 return_code , return_value = _perform_api_call (
92118 "/run/" , file_elements = file_elements )
93119 run_id = int (xmltodict .parse (return_value )['oml:upload_run' ]['oml:run_id' ])
@@ -104,7 +130,11 @@ def _create_description_xml(self):
104130 """
105131 run_environment = _get_version_information ()
106132
107- parameter_settings = self .model .get_params ()
133+ # TODO: don't we have flow object in data structure? Use this one
134+ downloaded_flow = openml .flows .get_flow (self .flow_id )
135+
136+ openml_param_settings = _parse_parameters (self .model , downloaded_flow )
137+
108138 # as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
109139 # so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
110140 well_formatted_time = time .strftime ("%c" ).replace (
@@ -113,11 +143,33 @@ def _create_description_xml(self):
113143 [self .model .__module__ + "." + self .model .__class__ .__name__ ]
114144 description = _to_dict (taskid = self .task_id , flow_id = self .flow_id ,
115145 setup_string = _create_setup_string (self .model ),
116- parameter_settings = parameter_settings ,
146+ parameter_settings = openml_param_settings ,
117147 tags = tags )
118148 description_xml = xmltodict .unparse (description , pretty = True )
119149 return description_xml
120150
151+ def _parse_parameters (model , flow ):
152+ python_param_settings = model .get_params ()
153+ openml_param_settings = []
154+ flow_dict = openml .flows .get_flow_dict (flow )
155+
156+ for param in python_param_settings :
157+ if "__" in param :
158+ # parameter of subflow. will be handled later
159+ continue
160+ if isinstance (python_param_settings [param ], BaseEstimator ):
161+ # extract parameters of the subflow individually
162+ subflow = flow .components [param ]
163+
164+ # add parameter setting (also the subflow. Just because we can)
165+ param_dict = OrderedDict ()
166+ param_dict ['oml:name' ] = param ;
167+ param_dict ['oml:value' ] = str (python_param_settings [param ]);
168+ param_dict ['oml:component' ] = flow_dict [flow .name ];
169+ openml_param_settings .append (param_dict )
170+
171+ return openml_param_settings
172+
121173################################################################################
122174# Functions which cannot be in runs/functions due to circular imports
123175
@@ -169,15 +221,7 @@ def _to_dict(taskid, flow_id, setup_string, parameter_settings, tags):
169221 description ['oml:run' ]['@xmlns:oml' ] = 'http://openml.org/openml'
170222 description ['oml:run' ]['oml:task_id' ] = taskid
171223 description ['oml:run' ]['oml:flow_id' ] = flow_id
172-
173- params = []
174- for k , v in parameter_settings .items ():
175- param_dict = OrderedDict ()
176- param_dict ['oml:name' ] = k
177- param_dict ['oml:value' ] = ('None' if v is None else v )
178- params .append (param_dict )
179-
180- description ['oml:run' ]['oml:parameter_setting' ] = params
224+ description ['oml:run' ]['oml:parameter_setting' ] = parameter_settings
181225 description ['oml:run' ]['oml:tag' ] = tags # Tags describing the run
182226 # description['oml:run']['oml:output_data'] = 0;
183227 # all data that was output of this run, which can be evaluation scores
0 commit comments