@@ -23,8 +23,8 @@ class OpenMLRun(object):
2323 FIXME
2424
2525 """
26- def __init__ (self , task_id , flow_id , setup_string , dataset_id , files = None ,
27- setup_id = None , tags = None , uploader = None , uploader_name = None ,
26+ def __init__ (self , task_id , flow_id , dataset_id , setup_string = None ,
27+ files = None , setup_id = None , tags = None , uploader = None , uploader_name = None ,
2828 evaluations = None , detailed_evaluations = None ,
2929 data_content = None , model = None , task_type = None ,
3030 task_evaluation_measure = None , flow_name = None ,
@@ -49,15 +49,20 @@ def __init__(self, task_id, flow_id, setup_string, dataset_id, files=None,
4949 self .flow = flow
5050 self .run_id = run_id
5151
52- def _generate_arff (self ):
53- """Generates an arff for upload to server.
52+ def _generate_arff_dict (self ):
53+ """Generates the arff dictionary for upload to the server.
54+
55+ Assumes that the run has been executed.
5456
5557 Returns
5658 -------
57- arf_dict : dictionary
58- Dictionary representation of an ARFF data format containing
59- predictions and confidences .
59+ arf_dict : dict
60+ Dictionary representation of the ARFF file that will be uploaded.
61+ Contains predictions and information about the run environment .
6062 """
63+ if self .data_content is None :
64+ raise ValueError ('Run has not been executed.' )
65+
6166 run_environment = (_get_version_information () +
6267 [time .strftime ("%c" )] + ['Created by run_task()' ])
6368 task = get_task (self .task_id )
@@ -85,7 +90,7 @@ def publish(self):
8590 -------
8691 self : OpenMLRun
8792 """
88- predictions = arff .dumps (self ._generate_arff ())
93+ predictions = arff .dumps (self ._generate_arff_dict ())
8994 description_xml = self ._create_description_xml ()
9095 file_elements = {'predictions' : ("predictions.csv" , predictions ),
9196 'description' : ("description.xml" , description_xml )}
@@ -159,11 +164,19 @@ def run_task(task, model):
159164 'only works for tasks with class labels.' )
160165 setup_string = _create_setup_string (model )
161166
162- run = OpenMLRun (task_id = task .task_id , flow_id = flow_id ,
163- setup_string = setup_string , dataset_id = dataset .dataset_id ,
164- task = task , flow = flow )
167+ run = OpenMLRun (task .task_id , flow_id , setup_string , dataset .id )
168+ run .data_content = _run_task_get_arffcontent (model , task , class_labels )
169+
170+ # The model will not be uploaded at the moment, but used to get the
171+ # hyperparameter values when uploading the run
172+ X , Y = task .get_X_and_y ()
173+ run .model = model .fit (X , Y )
174+ return run
175+
165176
166- train_times = []
177+ def _run_task_get_arffcontent (model , task , class_labels ):
178+ X , Y = task .get_X_and_y ()
179+ arff_datacontent = []
167180
168181 rep_no = 0
169182 # TODO use different iterator to only provide a single iterator (less
@@ -177,26 +190,21 @@ def run_task(task, model):
177190 testX = X [test_indices ]
178191 testY = Y [test_indices ]
179192
180- start_time = time .time ()
181193 model .fit (trainX , trainY )
182194 ProbaY = model .predict_proba (testX )
183195 PredY = model .predict (testX )
184- end_time = time .time ()
185-
186- train_times .append (end_time - start_time )
187196
188197 for i in range (0 , len (test_indices )):
189- arff_line = [rep_no , fold_no , test_indices [i ],
190- class_labels [PredY [i ]], class_labels [testY [i ]]]
191- arff_line [3 :3 ] = ProbaY [i ]
198+ arff_line = [rep_no , fold_no , test_indices [i ]]
199+ arff_line .extend (ProbaY [i ])
200+ arff_line .append (class_labels [PredY [i ]])
201+ arff_line .append (class_labels [testY [i ]])
192202 arff_datacontent .append (arff_line )
193203
194204 fold_no = fold_no + 1
195205 rep_no = rep_no + 1
196206
197- run .data_content = arff_datacontent
198- run .model = model .fit (X , Y )
199- return run
207+ return arff_datacontent
200208
201209
202210def _to_dict (taskid , flow_id , setup_string , parameter_settings , tags ):
0 commit comments