Merge pull request #356 from amueller/task_tagging

mfeurer · web-flow · commit 08c9cf6e4ba2 · 2017-10-13T14:35:57.000+02:00
tagging for other stuff
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -139,6 +139,8 @@ def _parse_server_exception(response):
     additional = None
     if 'oml:additional_information' in server_exception['oml:error']:
         additional = server_exception['oml:error']['oml:additional_information']
-    if code in [370, 372]:
+    if code in [370, 372, 512, 500, 482]:
+        # 512 for runs, 370 for datasets (should be 372), 500 for flows
+        # 482 for tasks
         return OpenMLServerNoResult(code, message, additional)
     return OpenMLServerException(code, message, additional)
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
@@ -355,6 +355,28 @@ def publish(self):
                              (flow_id, message))
         return self
 
+    def push_tag(self, tag):
+        """Annotates this flow with a tag on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the flow.
+        """
+        data = {'flow_id': self.flow_id, 'tag': tag}
+        _perform_api_call("/flow/tag", data=data)
+
+    def remove_tag(self, tag):
+        """Removes a tag from this flow on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the flow.
+        """
+        data = {'flow_id': self.flow_id, 'tag': tag}
+        _perform_api_call("/flow/untag", data=data)
+
 
 def _copy_server_fields(source_flow, target_flow):
     fields_added_by_the_server = ['flow_id', 'uploader', 'version',
@@ -370,5 +392,3 @@ def _copy_server_fields(source_flow, target_flow):
 def _add_if_nonempty(dic, key, value):
     if value is not None:
         dic[key] = value
-
-
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -4,6 +4,7 @@
 import six
 
 from openml._api_calls import _perform_api_call
+from openml.exceptions import OpenMLServerNoResult
 from . import OpenMLFlow
 
 
@@ -70,7 +71,9 @@ def list_flows(offset=None, size=None, tag=None):
 
 
 def flow_exists(name, external_version):
-    """Retrieves the flow id of the flow uniquely identified by name + external_version.
+    """Retrieves the flow id.
+
+    A flow is uniquely identified by name + external_version.
 
     Parameter
     ---------
@@ -93,8 +96,9 @@ def flow_exists(name, external_version):
     if not (isinstance(name, six.string_types) and len(external_version) > 0):
         raise ValueError('Argument \'version\' should be a non-empty string')
 
-    xml_response = _perform_api_call("flow/exists",
-                                     data={'name': name, 'external_version': external_version})
+    xml_response = _perform_api_call(
+        "flow/exists", data={'name': name, 'external_version':
+                             external_version})
 
     result_dict = xmltodict.parse(xml_response)
     flow_id = int(result_dict['oml:flow_exists']['oml:id'])
@@ -105,15 +109,17 @@ def flow_exists(name, external_version):
 
 
 def _list_flows(api_call):
-    # TODO add proper error handling here!
-    xml_string = _perform_api_call(api_call)
+    try:
+        xml_string = _perform_api_call(api_call)
+    except OpenMLServerNoResult:
+        return []
     flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
 
     # Minimalistic check if the XML is useful
     assert type(flows_dict['oml:flows']['oml:flow']) == list, \
         type(flows_dict['oml:flows'])
     assert flows_dict['oml:flows']['@xmlns:oml'] == \
-           'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']
+        'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']
 
     flows = dict()
     for flow_ in flows_dict['oml:flows']['oml:flow']:
@@ -190,10 +196,10 @@ def assert_flows_equal(flow1, flow2,
         attr2 = getattr(flow2, key, None)
         if key == 'components':
             for name in set(attr1.keys()).union(attr2.keys()):
-                if not name in attr1:
+                if name not in attr1:
                     raise ValueError('Component %s only available in '
                                      'argument2, but not in argument1.' % name)
-                if not name in attr2:
+                if name not in attr2:
                     raise ValueError('Component %s only available in '
                                      'argument2, but not in argument1.' % name)
                 assert_flows_equal(attr1[name], attr2[name],
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -14,7 +14,7 @@
 
 import openml
 import openml.utils
-from ..exceptions import PyOpenMLError
+from ..exceptions import PyOpenMLError, OpenMLServerNoResult
 from .. import config
 from ..flows import sklearn_to_flow, get_flow, flow_exists, _check_n_jobs, \
     _copy_server_fields
@@ -862,8 +862,10 @@ def list_runs(offset=None, size=None, id=None, task=None, setup=None,
 
 def _list_runs(api_call):
     """Helper function to parse API calls which are lists of runs"""
-
-    xml_string = _perform_api_call(api_call)
+    try:
+        xml_string = _perform_api_call(api_call)
+    except OpenMLServerNoResult:
+        return []
 
     runs_dict = xmltodict.parse(xml_string, force_list=('oml:run',))
     # Minimalistic check if the XML is useful
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -1,4 +1,4 @@
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict
 import json
 import sys
 import time
@@ -12,6 +12,7 @@
 from .._api_calls import _perform_api_call, _file_id_to_url, _read_url_files
 from ..exceptions import PyOpenMLError
 
+
 class OpenMLRun(object):
     """OpenML Run: result of running a model on an openml dataset.
 
@@ -349,6 +350,28 @@ def extract_parameters(_flow, _flow_dict, component_model,
 
         return parameters
 
+    def push_tag(self, tag):
+        """Annotates this run with a tag on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the run.
+        """
+        data = {'run_id': self.run_id, 'tag': tag}
+        _perform_api_call("/run/tag", data=data)
+
+    def remove_tag(self, tag):
+        """Removes a tag from this run on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the run.
+        """
+        data = {'run_id': self.run_id, 'tag': tag}
+        _perform_api_call("/run/untag", data=data)
+
 
 ################################################################################
 # Functions which cannot be in runs/functions due to circular imports
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -7,7 +7,7 @@
 from oslo_concurrency import lockutils
 import xmltodict
 
-from ..exceptions import OpenMLCacheException
+from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
 from ..datasets import get_dataset
 from .task import OpenMLTask, _create_task_cache_dir
 from .. import config
@@ -55,9 +55,9 @@ def _get_estimation_procedure_list():
     Returns
     -------
     procedures : list
-        A list of all estimation procedures. Every procedure is represented by a
-        dictionary containing the following information: id,
-        task type id, name, type, repeats, folds, stratified.
+        A list of all estimation procedures. Every procedure is represented by
+        a dictionary containing the following information: id, task type id,
+        name, type, repeats, folds, stratified.
     """
 
     xml_string = _perform_api_call("estimationprocedure/list")
@@ -138,7 +138,10 @@ def list_tasks(task_type_id=None, offset=None, size=None, tag=None):
 
 
 def _list_tasks(api_call):
-    xml_string = _perform_api_call(api_call)
+    try:
+        xml_string = _perform_api_call(api_call)
+    except OpenMLServerNoResult:
+        return []
     tasks_dict = xmltodict.parse(xml_string, force_list=('oml:task',))
     # Minimalistic check if the XML is useful
     if 'oml:tasks' not in tasks_dict:
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -4,7 +4,7 @@
 from .. import config
 from .. import datasets
 from .split import OpenMLSplit
-from .._api_calls import _read_url
+from .._api_calls import _read_url, _perform_api_call
 
 
 class OpenMLTask(object):
@@ -92,6 +92,28 @@ def get_split_dimensions(self):
 
         return self.split.repeats, self.split.folds, self.split.samples
 
+    def push_tag(self, tag):
+        """Annotates this task with a tag on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the task.
+        """
+        data = {'task_id': self.task_id, 'tag': tag}
+        _perform_api_call("/task/tag", data=data)
+
+    def remove_tag(self, tag):
+        """Removes a tag from this task on the server.
+
+        Parameters
+        ----------
+        tag : str
+            Tag to attach to the task.
+        """
+        data = {'task_id': self.task_id, 'tag': tag}
+        _perform_api_call("/task/untag", data=data)
+
 
 def _create_task_cache_dir(task_id):
     task_cache_dir = os.path.join(config.get_cache_directory(), "tasks", str(task_id))
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
@@ -36,8 +36,8 @@ class TestFlow(TestBase):
     _multiprocess_can_split_ = True
 
     def test_get_flow(self):
-        # We need to use the production server here because 4024 is not the test
-        # server
+        # We need to use the production server here because 4024 is not the
+        # test server
         openml.config.server = self.production_server
 
         flow = openml.flows.get_flow(4024)
@@ -67,6 +67,21 @@ def test_get_flow(self):
         self.assertEqual(subflow_3.parameters['L'], '-1')
         self.assertEqual(len(subflow_3.components), 0)
 
+    def test_tagging(self):
+        flow_list = openml.flows.list_flows(size=1)
+        flow_id = list(flow_list.keys())[0]
+        flow = openml.flows.get_flow(flow_id)
+        tag = "testing_tag_{}_{}".format(self.id(), time.time())
+        flow_list = openml.flows.list_flows(tag=tag)
+        self.assertEqual(len(flow_list), 0)
+        flow.push_tag(tag)
+        flow_list = openml.flows.list_flows(tag=tag)
+        self.assertEqual(len(flow_list), 1)
+        self.assertIn(flow_id, flow_list)
+        flow.remove_tag(tag)
+        flow_list = openml.flows.list_flows(tag=tag)
+        self.assertEqual(len(flow_list), 0)
+
     def test_from_xml_to_xml(self):
         # Get the raw xml thing
         # TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
@@ -1,43 +1,44 @@
+from time import time
+
 from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
 
 from openml.testing import TestBase
 from openml.flows.sklearn_converter import sklearn_to_flow
 from openml import OpenMLRun
+import openml
 
 
 class TestRun(TestBase):
-    # Splitting not helpful, these test's don't rely on the server and take less
-    # than 1 seconds
+    # Splitting not helpful, these test's don't rely on the server and take
+    # less than 1 seconds
 
     def test_parse_parameters_flow_not_on_server(self):
 
         model = LogisticRegression()
         flow = sklearn_to_flow(model)
-        self.assertRaisesRegexp(ValueError,
-                                'Flow sklearn.linear_model.logistic.LogisticRegression '
-                                'has no flow_id!',
-                                OpenMLRun._parse_parameters, flow)
+        self.assertRaisesRegexp(
+            ValueError, 'Flow sklearn.linear_model.logistic.LogisticRegression'
+            ' has no flow_id!', OpenMLRun._parse_parameters, flow)
 
         model = AdaBoostClassifier(base_estimator=LogisticRegression())
         flow = sklearn_to_flow(model)
         flow.flow_id = 1
-        self.assertRaisesRegexp(ValueError,
-                                'Flow sklearn.linear_model.logistic.LogisticRegression '
-                                'has no flow_id!',
-                                OpenMLRun._parse_parameters, flow)
+        self.assertRaisesRegexp(
+            ValueError, 'Flow sklearn.linear_model.logistic.LogisticRegression'
+            ' has no flow_id!', OpenMLRun._parse_parameters, flow)
 
     def test_parse_parameters(self):
 
         model = RandomizedSearchCV(
             estimator=RandomForestClassifier(n_estimators=5),
-            param_distributions={"max_depth": [3, None],
-                                 "max_features": [1, 2, 3, 4],
-                                 "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
-                                 "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                                 "bootstrap": [True, False],
-                                 "criterion": ["gini", "entropy"]},
+            param_distributions={
+                "max_depth": [3, None],
+                "max_features": [1, 2, 3, 4],
+                "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+                "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                "bootstrap": [True, False], "criterion": ["gini", "entropy"]},
             cv=StratifiedKFold(n_splits=2, random_state=1),
             n_iter=5)
         flow = sklearn_to_flow(model)
@@ -49,3 +50,16 @@ def test_parse_parameters(self):
             if parameter['oml:name'] == 'n_estimators':
                 self.assertEqual(parameter['oml:value'], '5')
                 self.assertEqual(parameter['oml:component'], 2)
+
+    def test_tagging(self):
+        run = openml.runs.get_run(1)
+        tag = "testing_tag_{}_{}".format(self.id(), time())
+        run_list = openml.runs.list_runs(tag=tag)
+        self.assertEqual(len(run_list), 0)
+        run.push_tag(tag)
+        run_list = openml.runs.list_runs(tag=tag)
+        self.assertEqual(len(run_list), 1)
+        self.assertIn(1, run_list)
+        run.remove_tag(tag)
+        run_list = openml.runs.list_runs(tag=tag)
+        self.assertEqual(len(run_list), 0)
diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py
@@ -1,11 +1,11 @@
 import sys
-import types
 
 if sys.version_info[0] >= 3:
     from unittest import mock
 else:
     import mock
 
+from time import time
 import numpy as np
 
 import openml
@@ -45,6 +45,19 @@ def test_get_X_and_Y(self):
         self.assertIsInstance(Y, np.ndarray)
         self.assertEqual(Y.dtype, float)
 
+    def test_tagging(self):
+        task = openml.tasks.get_task(1)
+        tag = "testing_tag_{}_{}".format(self.id(), time())
+        task_list = openml.tasks.list_tasks(tag=tag)
+        self.assertEqual(len(task_list), 0)
+        task.push_tag(tag)
+        task_list = openml.tasks.list_tasks(tag=tag)
+        self.assertEqual(len(task_list), 1)
+        self.assertIn(1, task_list)
+        task.remove_tag(tag)
+        task_list = openml.tasks.list_tasks(tag=tag)
+        self.assertEqual(len(task_list), 0)
+
     def test_get_train_and_test_split_indices(self):
         openml.config.set_cache_directory(self.static_cache_dir)
         task = openml.tasks.get_task(1882)
@@ -62,4 +75,3 @@ def test_get_train_and_test_split_indices(self):
                                 task.get_train_test_split_indices, 10, 0)
         self.assertRaisesRegexp(ValueError, "Repeat 10 not known",
                                 task.get_train_test_split_indices, 0, 10)
-