Skip to content

Commit be9dd3d

Browse files
committed
added tag mechanisms for runs #214
1 parent 0497040 commit be9dd3d

3 files changed

Lines changed: 38 additions & 32 deletions

File tree

openml/runs/functions.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from ..util import URLError
1616
from ..tasks.functions import _create_task_from_xml
1717
from .._api_calls import _perform_api_call
18-
from .run import OpenMLRun
18+
from .run import OpenMLRun, _get_version_information
1919

2020

2121
# _get_version_info, _get_dict and _create_setup_string are in run.py to avoid
@@ -66,8 +66,10 @@ def run_task(task, model):
6666
raise ValueError('The task has no class labels. This method currently '
6767
'only works for tasks with class labels.')
6868

69+
run_environment = _get_version_information()
70+
tags = ['openml-python', run_environment[1]]
6971
# execute the run
70-
run = OpenMLRun(task_id=task.task_id, flow_id=flow_id, dataset_id=dataset.dataset_id, model=model)
72+
run = OpenMLRun(task_id=task.task_id, flow_id=flow_id, dataset_id=dataset.dataset_id, model=model, tags=tags)
7173

7274
try:
7375
run.data_content, run.trace_content, run.trace_attributes = _run_task_get_arffcontent(model, task, class_labels)
@@ -337,27 +339,31 @@ def _create_run_from_xml(xml):
337339
evaluations = dict()
338340
detailed_evaluations = defaultdict(lambda: defaultdict(dict))
339341
evaluation_flows = dict()
340-
for evaluation_dict in run['oml:output_data']['oml:evaluation']:
341-
key = evaluation_dict['oml:name']
342-
if 'oml:value' in evaluation_dict:
343-
value = float(evaluation_dict['oml:value'])
344-
elif 'oml:array_data' in evaluation_dict:
345-
value = evaluation_dict['oml:array_data']
346-
else:
347-
raise ValueError('Could not find keys "value" or "array_data" '
348-
'in %s' % str(evaluation_dict.keys()))
349-
350-
if '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
351-
repeat = int(evaluation_dict['@repeat'])
352-
fold = int(evaluation_dict['@fold'])
353-
repeat_dict = detailed_evaluations[key]
354-
fold_dict = repeat_dict[repeat]
355-
fold_dict[fold] = value
356-
else:
357-
evaluations[key] = value
358-
evaluation_flows[key] = flow_id
342+
if 'oml:output_data' in run and 'oml:evaluation' in run['oml:output_data']:
343+
for evaluation_dict in run['oml:output_data']['oml:evaluation']:
344+
key = evaluation_dict['oml:name']
345+
if 'oml:value' in evaluation_dict:
346+
value = float(evaluation_dict['oml:value'])
347+
elif 'oml:array_data' in evaluation_dict:
348+
value = evaluation_dict['oml:array_data']
349+
else:
350+
raise ValueError('Could not find keys "value" or "array_data" '
351+
'in %s' % str(evaluation_dict.keys()))
352+
353+
if '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
354+
repeat = int(evaluation_dict['@repeat'])
355+
fold = int(evaluation_dict['@fold'])
356+
repeat_dict = detailed_evaluations[key]
357+
fold_dict = repeat_dict[repeat]
358+
fold_dict[fold] = value
359+
else:
360+
evaluations[key] = value
361+
evaluation_flows[key] = flow_id
359362

360-
evaluation_flows[key] = flow_id
363+
evaluation_flows[key] = flow_id
364+
tags = None
365+
if 'oml:tag' in run:
366+
tags = run['oml:tag']
361367

362368
return OpenMLRun(run_id=run_id, uploader=uploader,
363369
uploader_name=uploader_name, task_id=task_id,
@@ -368,7 +374,7 @@ def _create_run_from_xml(xml):
368374
parameter_settings=parameters,
369375
dataset_id=dataset_id, predictions_url=predictions_url,
370376
evaluations=evaluations,
371-
detailed_evaluations=detailed_evaluations)
377+
detailed_evaluations=detailed_evaluations, tags=tags)
372378

373379

374380
def _get_cached_run(run_id):

openml/runs/run.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
4848
self.flow = flow
4949
self.run_id = run_id
5050
self.model = model
51+
self.tags = tags
5152

5253
def _generate_arff_dict(self):
5354
"""Generates the arff dictionary for uploading predictions to the server.
@@ -142,24 +143,17 @@ def _create_description_xml(self):
142143
xml_string : string
143144
XML description of run.
144145
"""
145-
run_environment = _get_version_information()
146146

147147
# TODO: don't we have flow object in data structure? Use this one
148148
downloaded_flow = openml.flows.get_flow(self.flow_id)
149149

150150
openml_param_settings = OpenMLRun._parse_parameters(self.model, downloaded_flow)
151151

152-
# as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
153-
# so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
154-
well_formatted_time = time.strftime("%c").replace(
155-
' ', '_').replace('/', '-').replace(':', '.')
156-
tags = run_environment + [well_formatted_time] + ['run_task'] + \
157-
[self.model.__module__ + "." + self.model.__class__.__name__]
158152
description = _to_dict(taskid=self.task_id, flow_id=self.flow_id,
159153
setup_string=_create_setup_string(self.model),
160154
parameter_settings=openml_param_settings,
161155
error_message=self.error_message,
162-
tags=tags)
156+
tags=self.tags)
163157
description_xml = xmltodict.unparse(description, pretty=True)
164158
return description_xml
165159

tests/test_runs/test_run_functions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ def test_run_iris(self):
5757
num_instances = 150
5858

5959
clf = LogisticRegression()
60-
self._perform_run(task_id,num_instances, clf)
60+
res = self._perform_run(task_id,num_instances, clf)
61+
62+
downloaded = openml.runs.get_run(res.run_id)
63+
assert('openml-python' in downloaded.tags)
6164

6265
def test_run_optimize_randomforest_iris(self):
6366
task_id = 10107
@@ -141,6 +144,7 @@ def test__run_task_get_arffcontent(self):
141144
'Iris-virginica'])
142145

143146
def test_get_run(self):
147+
openml.config.server = self.production_server
144148
run = openml.runs.get_run(473350)
145149
self.assertEqual(run.dataset_id, 1167)
146150
self.assertEqual(run.evaluations['f_measure'], 0.624668)
@@ -155,6 +159,8 @@ def test_get_run(self):
155159
(8, 0.56759),
156160
(9, 0.64621)]:
157161
self.assertEqual(run.detailed_evaluations['f_measure'][0][i], value)
162+
assert('weka' in run.tags)
163+
assert('stacking' in run.tags)
158164

159165
def _check_run(self, run):
160166
self.assertIsInstance(run, dict)

0 commit comments

Comments
 (0)