feature request #208

janvanrijn · janvanrijn · commit 49b2d38b2161 · 2017-03-20T23:53:50.000+01:00
- added runtime information (Python version &gt;= 3.3)
- fixed get run cache bug
- removed run tags that I don't like
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -5,14 +5,15 @@
 import numpy as np
 import warnings
 import sklearn
+import time
 from sklearn.model_selection._search import BaseSearchCV
 
 from build.lib.openml.exceptions import PyOpenMLError
 from .. import config
 from ..flows import sklearn_to_flow, get_flow
 from ..setups import setup_exists
 from ..exceptions import OpenMLCacheException, OpenMLServerException
-from ..util import URLError
+from ..util import URLError, version_complies
 from ..tasks.functions import _create_task_from_xml
 from .._api_calls import _perform_api_call
 from .run import OpenMLRun
@@ -70,7 +71,7 @@ def run_task(task, model):
     run = OpenMLRun(task_id=task.task_id, flow_id=flow_id, dataset_id=dataset.dataset_id, model=model)
 
     try:
-        run.data_content, run.trace_content = _run_task_get_arffcontent(model, task, class_labels)
+        run.data_content, run.trace_content, run.detailed_evaluations = _run_task_get_arffcontent(model, task, class_labels)
     except PyOpenMLError as message:
         run.error_message = str(message)
         warnings.warn("Run terminated with error: %s" %run.error_message)
@@ -141,6 +142,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
     X, Y = task.get_X_and_y()
     arff_datacontent = []
     arff_tracecontent = []
+    user_defined_measures = defaultdict(lambda: defaultdict(dict))
 
     rep_no = 0
     # TODO use different iterator to only provide a single iterator (less
@@ -156,8 +158,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
             testY = Y[test_indices]
 
             try:
+                # for measuring runtime. Only available since Python 3.3
+                if version_complies(3, 3):
+                    modelfit_starttime = time.process_time()
                 model_fold.fit(trainX, trainY)
 
+                if version_complies(3, 3):
+                    modelfit_duration = time.process_time() - modelfit_starttime
+                    user_defined_measures['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration
+
                 if isinstance(model_fold, BaseSearchCV):
                     _add_results_to_arfftrace(arff_tracecontent, fold_no, model_fold, rep_no)
                     model_classes = model_fold.best_estimator_.classes_
@@ -167,8 +176,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
                 # typically happens when training a regressor on classification task
                 raise PyOpenMLError(str(e))
 
+            if version_complies(3, 3):
+                modelpredict_starttime = time.process_time()
             ProbaY = model_fold.predict_proba(testX)
             PredY = model_fold.predict(testX)
+            if version_complies(3, 3):
+                modelpredict_duration = time.process_time() - modelpredict_starttime
+                user_defined_measures['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
+                user_defined_measures['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration
+
             if ProbaY.shape[1] != len(class_labels):
                 warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))
 
@@ -182,10 +198,20 @@ def _run_task_get_arffcontent(model, task, class_labels):
     if not isinstance(model, BaseSearchCV):
         arff_tracecontent = None
 
-    return arff_datacontent, arff_tracecontent
+    return arff_datacontent, arff_tracecontent, user_defined_measures
 
 
 def _add_results_to_arfftrace(arff_tracecontent, fold_no, model, rep_no):
+    '''
+    Extracts the various results calculated by `BaseSearchCV` classes into openml trace arff format
+
+    :param arff_tracecontent: the list that the results should be appended to
+    :param fold_no: cv fold number
+    :param model: the model to extract from
+    :param rep_no: cv repetition number
+
+    :return: A list lists, each representing an arff line
+    '''
     for itt_no in range(0, len(model.cv_results_['mean_test_score'])):
         # we use the string values for True and False, as it is defined in this way by the OpenML server
         selected = 'false'
@@ -350,7 +376,7 @@ def _get_cached_run(run_id):
         run_file = os.path.join(run_cache_dir,
                                 "run_%d.xml" % int(run_id))
         with io.open(run_file, encoding='utf8') as fh:
-            run = _create_task_from_xml(xml=fh.read())
+            run = _create_run_from_xml(xml=fh.read())
         return run
 
     except (OSError, IOError):
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -170,14 +170,16 @@ def _create_description_xml(self):
 
         # as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
         # so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
-        well_formatted_time = time.strftime("%c").replace(
-            ' ', '_').replace('/', '-').replace(':', '.')
-        tags = run_environment + [well_formatted_time] + ['run_task'] + \
-            [self.model.__module__ + "." + self.model.__class__.__name__]
+        # well_formatted_time = time.strftime("%c").replace(
+        #     ' ', '_').replace('/', '-').replace(':', '.')
+        # tags = run_environment + [well_formatted_time] + ['run_task'] + \
+        #     [self.model.__module__ + "." + self.model.__class__.__name__]
+        tags = ['openml-python', run_environment[1]]
         description = _to_dict(taskid=self.task_id, flow_id=self.flow_id,
                                setup_string=_create_setup_string(self.model),
                                parameter_settings=openml_param_settings,
                                error_message=self.error_message,
+                               detailed_evaluations=self.detailed_evaluations,
                                tags=tags)
         description_xml = xmltodict.unparse(description, pretty=True)
         return description_xml
@@ -266,7 +268,7 @@ def _get_version_information():
     return [python_version, sklearn_version, numpy_version, scipy_version]
 
 
-def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags):
+def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags=None, detailed_evaluations=None):
     """ Creates a dictionary corresponding to the desired xml desired by openML
 
     Parameters
@@ -293,11 +295,17 @@ def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, t
     if error_message is not None:
         description['oml:run']['oml:error_message'] = error_message
     description['oml:run']['oml:parameter_setting'] = parameter_settings
-    description['oml:run']['oml:tag'] = tags  # Tags describing the run
-    # description['oml:run']['oml:output_data'] = 0;
-    # all data that was output of this run, which can be evaluation scores
-    # (though those are also calculated serverside)
-    # must be of special data type
+    if tags is not None:
+        description['oml:run']['oml:tag'] = tags  # Tags describing the run
+    if detailed_evaluations is not None:
+        description['oml:run']['oml:output_data'] = dict()
+        description['oml:run']['oml:output_data']['oml:evaluation'] = list()
+        for measure in detailed_evaluations:
+            for repeat in detailed_evaluations[measure]:
+                for fold, value in detailed_evaluations[measure][repeat].items():
+                    current = OrderedDict([('@repeat', str(repeat)), ('@fold', str(fold)),
+                                           ('oml:name', measure), ('oml:value', str(value))])
+                    description['oml:run']['oml:output_data']['oml:evaluation'].append(current)
     return description
 
 
diff --git a/openml/util.py b/openml/util.py
@@ -12,5 +12,15 @@ def is_string(obj):
     except NameError:
         return isinstance(obj, str)
 
+def version_complies(major, minor=None):
+    version = sys.version_info
+    if version[0] > major:
+        return True
+    if version[0] < major:
+        return False
+    # version == major
+    if minor is None or version[1] >= minor:
+        return True
+    return False
 
 __all__ = ['URLError', 'is_string']
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -117,7 +117,7 @@ def test__run_task_get_arffcontent(self):
                                 clf, task, class_labels)
 
         clf = SGDClassifier(loss='log', random_state=1)
-        arff_datacontent, arff_tracecontent = openml.runs.functions._run_task_get_arffcontent(
+        arff_datacontent, arff_tracecontent, _ = openml.runs.functions._run_task_get_arffcontent(
             clf, task, class_labels)
         # predictions
         self.assertIsInstance(arff_datacontent, list)