added tag mechanisms for runs #214

janvanrijn · janvanrijn · commit be9dd3d4fd69 · 2017-03-21T20:10:02.000+01:00
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -15,7 +15,7 @@
 from ..util import URLError
 from ..tasks.functions import _create_task_from_xml
 from .._api_calls import _perform_api_call
-from .run import OpenMLRun
+from .run import OpenMLRun, _get_version_information
 
 
 # _get_version_info, _get_dict and _create_setup_string are in run.py to avoid
@@ -66,8 +66,10 @@ def run_task(task, model):
         raise ValueError('The task has no class labels. This method currently '
                          'only works for tasks with class labels.')
 
+    run_environment = _get_version_information()
+    tags = ['openml-python', run_environment[1]]
     # execute the run
-    run = OpenMLRun(task_id=task.task_id, flow_id=flow_id, dataset_id=dataset.dataset_id, model=model)
+    run = OpenMLRun(task_id=task.task_id, flow_id=flow_id, dataset_id=dataset.dataset_id, model=model, tags=tags)
 
     try:
         run.data_content, run.trace_content, run.trace_attributes = _run_task_get_arffcontent(model, task, class_labels)
@@ -337,27 +339,31 @@ def _create_run_from_xml(xml):
     evaluations = dict()
     detailed_evaluations = defaultdict(lambda: defaultdict(dict))
     evaluation_flows = dict()
-    for evaluation_dict in run['oml:output_data']['oml:evaluation']:
-        key = evaluation_dict['oml:name']
-        if 'oml:value' in evaluation_dict:
-            value = float(evaluation_dict['oml:value'])
-        elif 'oml:array_data' in evaluation_dict:
-            value = evaluation_dict['oml:array_data']
-        else:
-            raise ValueError('Could not find keys "value" or "array_data" '
-                             'in %s' % str(evaluation_dict.keys()))
-
-        if '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
-            repeat = int(evaluation_dict['@repeat'])
-            fold = int(evaluation_dict['@fold'])
-            repeat_dict = detailed_evaluations[key]
-            fold_dict = repeat_dict[repeat]
-            fold_dict[fold] = value
-        else:
-            evaluations[key] = value
-            evaluation_flows[key] = flow_id
+    if 'oml:output_data' in run and 'oml:evaluation' in run['oml:output_data']:
+        for evaluation_dict in run['oml:output_data']['oml:evaluation']:
+            key = evaluation_dict['oml:name']
+            if 'oml:value' in evaluation_dict:
+                value = float(evaluation_dict['oml:value'])
+            elif 'oml:array_data' in evaluation_dict:
+                value = evaluation_dict['oml:array_data']
+            else:
+                raise ValueError('Could not find keys "value" or "array_data" '
+                                 'in %s' % str(evaluation_dict.keys()))
+
+            if '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
+                repeat = int(evaluation_dict['@repeat'])
+                fold = int(evaluation_dict['@fold'])
+                repeat_dict = detailed_evaluations[key]
+                fold_dict = repeat_dict[repeat]
+                fold_dict[fold] = value
+            else:
+                evaluations[key] = value
+                evaluation_flows[key] = flow_id
 
-        evaluation_flows[key] = flow_id
+            evaluation_flows[key] = flow_id
+    tags = None
+    if 'oml:tag' in run:
+        tags = run['oml:tag']
 
     return OpenMLRun(run_id=run_id, uploader=uploader,
                      uploader_name=uploader_name, task_id=task_id,
@@ -368,7 +374,7 @@ def _create_run_from_xml(xml):
                      parameter_settings=parameters,
                      dataset_id=dataset_id, predictions_url=predictions_url,
                      evaluations=evaluations,
-                     detailed_evaluations=detailed_evaluations)
+                     detailed_evaluations=detailed_evaluations, tags=tags)
 
 
 def _get_cached_run(run_id):
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -48,6 +48,7 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
         self.flow = flow
         self.run_id = run_id
         self.model = model
+        self.tags = tags
 
     def _generate_arff_dict(self):
         """Generates the arff dictionary for uploading predictions to the server.
@@ -142,24 +143,17 @@ def _create_description_xml(self):
         xml_string : string
             XML description of run.
         """
-        run_environment = _get_version_information()
 
         # TODO: don't we have flow object in data structure? Use this one
         downloaded_flow = openml.flows.get_flow(self.flow_id)
 
         openml_param_settings = OpenMLRun._parse_parameters(self.model, downloaded_flow)
 
-        # as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
-        # so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
-        well_formatted_time = time.strftime("%c").replace(
-            ' ', '_').replace('/', '-').replace(':', '.')
-        tags = run_environment + [well_formatted_time] + ['run_task'] + \
-            [self.model.__module__ + "." + self.model.__class__.__name__]
         description = _to_dict(taskid=self.task_id, flow_id=self.flow_id,
                                setup_string=_create_setup_string(self.model),
                                parameter_settings=openml_param_settings,
                                error_message=self.error_message,
-                               tags=tags)
+                               tags=self.tags)
         description_xml = xmltodict.unparse(description, pretty=True)
         return description_xml
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -57,7 +57,10 @@ def test_run_iris(self):
         num_instances = 150
 
         clf = LogisticRegression()
-        self._perform_run(task_id,num_instances, clf)
+        res = self._perform_run(task_id,num_instances, clf)
+
+        downloaded = openml.runs.get_run(res.run_id)
+        assert('openml-python' in downloaded.tags)
 
     def test_run_optimize_randomforest_iris(self):
         task_id = 10107
@@ -141,6 +144,7 @@ def test__run_task_get_arffcontent(self):
                                          'Iris-virginica'])
 
     def test_get_run(self):
+        openml.config.server = self.production_server
         run = openml.runs.get_run(473350)
         self.assertEqual(run.dataset_id, 1167)
         self.assertEqual(run.evaluations['f_measure'], 0.624668)
@@ -155,6 +159,8 @@ def test_get_run(self):
                          (8, 0.56759),
                          (9, 0.64621)]:
             self.assertEqual(run.detailed_evaluations['f_measure'][0][i], value)
+        assert('weka' in run.tags)
+        assert('stacking' in run.tags)
 
     def _check_run(self, run):
         self.assertIsInstance(run, dict)