55import arff
66import xmltodict
77from sklearn .base import BaseEstimator
8+ from sklearn .model_selection ._search import BaseSearchCV
89
910import openml
1011from ..tasks import get_task
@@ -78,18 +79,48 @@ def _generate_arff_dict(self):
7879 arff_dict ['relation' ] = 'openml_task_' + str (task .task_id ) + '_predictions'
7980 return arff_dict
8081
81- def _generate_trace_arff_dict (self ):
82+ def _generate_trace_arff_dict (self , model ):
83+ """Generates the arff dictionary for uploading predictions to the server.
84+
85+ Assumes that the run has been executed.
86+
87+ Returns
88+ -------
89+ arf_dict : dict
90+ Dictionary representation of the ARFF file that will be uploaded.
91+ Contains information about the optimization trace.
92+ """
8293 if self .trace_content is None :
83- raise ValueError ('No trace content avaiable. (This should never happen.)' )
94+ raise ValueError ('No trace content avaiable.' )
95+ if not isinstance (model , BaseSearchCV ):
96+ raise PyOpenMLError ('Cannot generate trace on provided classifier. (This should never happen.)' )
97+
8498 arff_dict = {}
8599 arff_dict ['attributes' ] = [('repeat' , 'NUMERIC' ),
86100 ('fold' , 'NUMERIC' ),
87101 ('iteration' , 'NUMERIC' ),
88- ('setup_string' , 'STRING' ),
89102 ('evaluation' , 'NUMERIC' ),
90103 ('selected' , ['true' , 'false' ])]
104+ for key in model .cv_results_ :
105+ if key .startswith ("param_" ):
106+ type = 'STRING'
107+ if all (isinstance (i , (bool )) for i in model .cv_results_ [key ]):
108+ type = ['True' , 'False' ]
109+ elif all (isinstance (i , (int , float )) for i in model .cv_results_ [key ]):
110+ type = 'NUMERIC'
111+ else :
112+ values = list (set (model .cv_results_ [key ])) # unique values
113+ if len (values ) < 100 : # arbitrary number. make it an option?
114+ type = [str (i ) for i in values ]
115+ print (key + ": " + str (type ))
116+
117+ attribute = ("parameter_" + key [6 :], type )
118+ arff_dict ['attributes' ].append (attribute )
119+
91120 arff_dict ['data' ] = self .trace_content
92121 arff_dict ['relation' ] = 'openml_task_' + str (self .task_id ) + '_predictions'
122+
123+ print (arff_dict )
93124 return arff_dict
94125
95126 def publish (self ):
@@ -111,7 +142,7 @@ def publish(self):
111142 file_elements = {'predictions' : ("predictions.arff" , predictions ),
112143 'description' : ("description.xml" , description_xml )}
113144 if self .trace_content is not None :
114- trace_arff = arff .dumps (self ._generate_trace_arff_dict ())
145+ trace_arff = arff .dumps (self ._generate_trace_arff_dict (self . model ))
115146 file_elements ['trace' ] = ("trace.arff" , trace_arff )
116147
117148 return_code , return_value = _perform_api_call (
@@ -149,6 +180,16 @@ def _create_description_xml(self):
149180 return description_xml
150181
151182def _parse_parameters (model , flow ):
183+ """Extracts all parameter settings from an model in OpenML format.
184+
185+ Parameters
186+ ----------
187+ model
188+ the sci-kit learn model (fitted)
189+ flow
190+ openml flow object (containing flow ids, i.e., it has to be downloaded from the server)
191+
192+ """
152193 python_param_settings = model .get_params ()
153194 openml_param_settings = []
154195 flow_dict = openml .flows .get_flow_dict (flow )
@@ -160,12 +201,13 @@ def _parse_parameters(model, flow):
160201 if isinstance (python_param_settings [param ], BaseEstimator ):
161202 # extract parameters of the subflow individually
162203 subflow = flow .components [param ]
204+ openml_param_settings += _parse_parameters (python_param_settings [param ], subflow )
163205
164206 # add parameter setting (also the subflow. Just because we can)
165207 param_dict = OrderedDict ()
166- param_dict ['oml:name' ] = param ;
167- param_dict ['oml:value' ] = str (python_param_settings [param ]);
168- param_dict ['oml:component' ] = flow_dict [flow .name ];
208+ param_dict ['oml:name' ] = param
209+ param_dict ['oml:value' ] = str (python_param_settings [param ])
210+ param_dict ['oml:component' ] = flow_dict [flow .name ]
169211 openml_param_settings .append (param_dict )
170212
171213 return openml_param_settings
0 commit comments