ADD /task/list/type/{id} api call

mfeurer · mfeurer · commit b430c736759c · 2016-03-17T18:55:35.000+01:00
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -16,7 +16,7 @@ API call                                        implemented tested properly test
 /data/features/{id}                             yes         yes
 /data/qualities/{id}                            yes         yes
 /data/list/                                     yes         yes
-/data/list/tag/{tag}
+/data/list/tag/{tag}                            yes         yes
 /data/upload/                                   yes         yes
 /data/tag
 /data/untag
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
@@ -1,5 +1,6 @@
 from .task import OpenMLTask
 from .split import OpenMLSplit
-from .task_functions import get_task, list_tasks
+from .task_functions import get_task, list_tasks, list_tasks_by_type
 
-__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'OpenMLSplit']
+__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'list_tasks_by_type',
+           'OpenMLSplit']
diff --git a/openml/tasks/task_functions.py b/openml/tasks/task_functions.py
@@ -81,20 +81,20 @@ def get_estimation_procedure_list():
     return procs
 
 
-def list_tasks(task_type_id=1):
-    """Return a list of all tasks which are on OpenML.
+def list_tasks_by_type(task_type_id):
+    """Return a list of all tasks for a given tasks type which are on OpenML.
 
     Parameters
     ----------
     task_type_id : int
         ID of the task type as detailed
-        `here <http://openml.org/api/?f=openml.task.types>`_.
+        `here <http://www.openml.org/search?type=task_type>`_.
 
     Returns
     -------
-    tasks : list
-        A list of all tasks. Every task is represented by a
-        dictionary containing the following information: task id,
+    list
+        A list of all tasks of the given task type. Every task is represented by
+        a dictionary containing the following information: task id,
         dataset id, task_type and status. If qualities are calculated for
         the associated dataset, some of these are also returned.
     """
@@ -103,9 +103,25 @@ def list_tasks(task_type_id=1):
     except:
         raise ValueError("Task Type ID is neither an Integer nor can be "
                          "cast to an Integer.")
+    return _list_tasks("task/list/type/%d" % task_type_id)
 
-    return_code, xml_string = _perform_api_call(
-        "task/list/type/%d" % task_type_id)
+
+def list_tasks():
+    """Return a list of all tasks which are on OpenML.
+
+    Returns
+    -------
+    list
+        A list of all tasks. Every task is represented by a
+        dictionary containing the following information: task id,
+        dataset id, task_type and status. If qualities are calculated for
+        the associated dataset, some of these are also returned.
+    """
+    return _list_tasks('task/list')
+
+
+def _list_tasks(api_call):
+    return_code, xml_string = _perform_api_call(api_call)
     tasks_dict = xmltodict.parse(xml_string)
     # Minimalistic check if the XML is useful
     assert tasks_dict['oml:tasks']['@xmlns:oml'] == \
@@ -127,7 +143,8 @@ def list_tasks(task_type_id=1):
             if input['@name'] == 'estimation_procedure':
                 task[input['@name']] = proc_dict[int(input['#text'])]['name']
             else:
-                task[input['@name']] = input['#text']
+                value = input.get('#text')
+                task[input['@name']] = value
 
         task[input['@name']] = input['#text']
 
diff --git a/tests/test_task.py b/tests/test_task.py
@@ -7,29 +7,30 @@
 
 
 class TestTask(TestBase):
+    def _check_task(self, task):
+        self.assertEqual(type(task), dict)
+        self.assertGreaterEqual(len(task), 2)
+        self.assertIn('did', task)
+        self.assertIsInstance(task['did'], int)
+        self.assertIn('status', task)
+        self.assertTrue(is_string(task['status']))
+        self.assertIn(task['status'],
+                      ['in_preparation', 'active', 'deactivated'])
+
     def test_list_tasks(self):
-        # We can only perform a smoke test here because we test on dynamic
-        # data from the internet...
-        def check_task(task):
-            self.assertEqual(type(task), dict)
-            self.assertGreaterEqual(len(task), 2)
-            self.assertIn('did', task)
-            self.assertIsInstance(task['did'], int)
-            self.assertIn('status', task)
-            self.assertTrue(is_string(task['status']))
-            self.assertIn(task['status'],
-                          ['in_preparation', 'active', 'deactivated'])
+        tasks = openml.tasks.list_tasks()
+        self.assertGreaterEqual(len(tasks), 2000)
+        for task in tasks:
+            self._check_task(task)
 
-        # use a small task type as we cant limit tasks.
-        # TODO inspect the tasks maybe?
-        tasks = openml.tasks.list_tasks(task_type_id=3)
+    def test_list_tasks_by_type(self):
+        tasks = openml.tasks.list_tasks_by_type(task_type_id=3)
         self.assertGreaterEqual(len(tasks), 300)
         for task in tasks:
-            check_task(task)
+            self._check_task(task)
 
     def test_get_task(self):
         task = openml.tasks.get_task(1)
-        print(task)
         self.assertTrue(os.path.exists(
             os.path.join(os.getcwd(), "tasks", "1", "task.xml")))
         self.assertTrue(os.path.exists(