Skip to content

Commit 8f8ec4e

Browse files
authored
Merge pull request #228 from openml/issue208
Issue208
2 parents 4b15e84 + 39c2ea9 commit 8f8ec4e

3 files changed

Lines changed: 47 additions & 14 deletions

File tree

openml/runs/functions.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
import numpy as np
66
import warnings
77
import sklearn
8+
import time
9+
from sklearn.model_selection._search import BaseSearchCV
810

911
from ..exceptions import PyOpenMLError
1012
from .. import config
1113
from ..flows import sklearn_to_flow, get_flow, flow_exists
1214
from ..setups import setup_exists
1315
from ..exceptions import OpenMLCacheException, OpenMLServerException
14-
from ..util import URLError
16+
from ..util import URLError, version_complies
1517
from ..tasks.functions import _create_task_from_xml
1618
from .._api_calls import _perform_api_call
1719
from .run import OpenMLRun
@@ -68,7 +70,6 @@ def run_task(task, model, avoid_duplicate_runs=True):
6870
run = OpenMLRun(task_id=task.task_id, flow_id=None, dataset_id=dataset.dataset_id, model=model)
6971
run.data_content, run.trace_content, run.trace_attributes = _run_task_get_arffcontent(model, task, class_labels)
7072

71-
7273
if flow_id == False:
7374
# means the flow did not exists. As we could run it, publish it now
7475
flow = flow.publish()
@@ -151,6 +152,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
151152
X, Y = task.get_X_and_y()
152153
arff_datacontent = []
153154
arff_tracecontent = []
155+
user_defined_measures = defaultdict(lambda: defaultdict(dict))
154156

155157
rep_no = 0
156158
# TODO use different iterator to only provide a single iterator (less
@@ -166,8 +168,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
166168
testY = Y[test_indices]
167169

168170
try:
171+
# for measuring runtime. Only available since Python 3.3
172+
if version_complies(3, 3):
173+
modelfit_starttime = time.process_time()
169174
model_fold.fit(trainX, trainY)
170175

176+
if version_complies(3, 3):
177+
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
178+
user_defined_measures['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration
179+
171180
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
172181
arff_tracecontent.extend(_extract_arfftrace(model_fold, rep_no, fold_no))
173182
model_classes = model_fold.best_estimator_.classes_
@@ -177,8 +186,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
177186
# typically happens when training a regressor on classification task
178187
raise PyOpenMLError(str(e))
179188

189+
if version_complies(3, 3):
190+
modelpredict_starttime = time.process_time()
180191
ProbaY = model_fold.predict_proba(testX)
181192
PredY = model_fold.predict(testX)
193+
if version_complies(3, 3):
194+
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
195+
user_defined_measures['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
196+
user_defined_measures['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration
197+
182198
if ProbaY.shape[1] != len(class_labels):
183199
warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))
184200

@@ -195,7 +211,6 @@ def _run_task_get_arffcontent(model, task, class_labels):
195211
else:
196212
arff_tracecontent = None
197213
arff_trace_attributes = None
198-
199214
return arff_datacontent, arff_tracecontent, arff_trace_attributes
200215

201216

@@ -397,7 +412,7 @@ def _get_cached_run(run_id):
397412
run_file = os.path.join(run_cache_dir,
398413
"run_%d.xml" % int(run_id))
399414
with io.open(run_file, encoding='utf8') as fh:
400-
run = _create_task_from_xml(xml=fh.read())
415+
run = _create_run_from_xml(xml=fh.read())
401416
return run
402417

403418
except (OSError, IOError):

openml/runs/run.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,16 @@ def _create_description_xml(self):
151151

152152
# as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
153153
# so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
154-
well_formatted_time = time.strftime("%c").replace(
155-
' ', '_').replace('/', '-').replace(':', '.')
156-
tags = run_environment + [well_formatted_time] + ['run_task'] + \
157-
[self.model.__module__ + "." + self.model.__class__.__name__]
154+
# well_formatted_time = time.strftime("%c").replace(
155+
# ' ', '_').replace('/', '-').replace(':', '.')
156+
# tags = run_environment + [well_formatted_time] + ['run_task'] + \
157+
# [self.model.__module__ + "." + self.model.__class__.__name__]
158+
tags = ['openml-python', run_environment[1]]
158159
description = _to_dict(taskid=self.task_id, flow_id=self.flow_id,
159160
setup_string=_create_setup_string(self.model),
160161
parameter_settings=openml_param_settings,
161162
error_message=self.error_message,
163+
detailed_evaluations=self.detailed_evaluations,
162164
tags=tags)
163165
description_xml = xmltodict.unparse(description, pretty=True)
164166
return description_xml
@@ -247,7 +249,7 @@ def _get_version_information():
247249
return [python_version, sklearn_version, numpy_version, scipy_version]
248250

249251

250-
def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags):
252+
def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags=None, detailed_evaluations=None):
251253
""" Creates a dictionary corresponding to the desired xml desired by openML
252254
253255
Parameters
@@ -274,11 +276,17 @@ def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, t
274276
if error_message is not None:
275277
description['oml:run']['oml:error_message'] = error_message
276278
description['oml:run']['oml:parameter_setting'] = parameter_settings
277-
description['oml:run']['oml:tag'] = tags # Tags describing the run
278-
# description['oml:run']['oml:output_data'] = 0;
279-
# all data that was output of this run, which can be evaluation scores
280-
# (though those are also calculated serverside)
281-
# must be of special data type
279+
if tags is not None:
280+
description['oml:run']['oml:tag'] = tags # Tags describing the run
281+
if detailed_evaluations is not None:
282+
description['oml:run']['oml:output_data'] = dict()
283+
description['oml:run']['oml:output_data']['oml:evaluation'] = list()
284+
for measure in detailed_evaluations:
285+
for repeat in detailed_evaluations[measure]:
286+
for fold, value in detailed_evaluations[measure][repeat].items():
287+
current = OrderedDict([('@repeat', str(repeat)), ('@fold', str(fold)),
288+
('oml:name', measure), ('oml:value', str(value))])
289+
description['oml:run']['oml:output_data']['oml:evaluation'].append(current)
282290
return description
283291

284292

openml/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,15 @@ def is_string(obj):
1212
except NameError:
1313
return isinstance(obj, str)
1414

15+
def version_complies(major, minor=None):
16+
version = sys.version_info
17+
if version[0] > major:
18+
return True
19+
if version[0] < major:
20+
return False
21+
# version == major
22+
if minor is None or version[1] >= minor:
23+
return True
24+
return False
1525

1626
__all__ = ['URLError', 'is_string']

0 commit comments

Comments
 (0)