merged list dataset functions to one function with conditional arguments.

janvanrijn · janvanrijn · commit 2c5c21411d97 · 2016-09-01T18:46:49.000+02:00
idem for list task functions.
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
@@ -1,8 +1,6 @@
-from .functions import (list_datasets, list_datasets_by_tag,
-                        check_datasets_active, get_datasets, get_dataset,
-                        list_datasets_paginate)
+from .functions import (list_datasets, check_datasets_active,
+                        get_datasets, get_dataset)
 from .dataset import OpenMLDataset
 
 __all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
-           'OpenMLDataset', 'list_datasets', 'list_datasets_by_tag',
-           'list_datasets_paginate']
+           'OpenMLDataset', 'list_datasets']
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -42,7 +42,7 @@ def _list_cached_datasets():
             dataset_directory_content = os.listdir(directory_name)
 
             if "dataset.arff" in dataset_directory_content and \
-                    "description.xml" in dataset_directory_content:
+                            "description.xml" in dataset_directory_content:
                 if dataset_id not in datasets:
                     datasets.append(dataset_id)
 
@@ -111,74 +111,51 @@ def _get_cached_dataset_arff(dataset_id):
                                "cached" % dataset_id)
 
 
-def list_datasets():
+def list_datasets(offset=None, size=None, tag=None):
     """Return a list of all dataset which are on OpenML.
 
-    Returns
-    -------
-    datasets : list of dicts
-        A list of all datasets.
-
-        Every dataset is represented by a dictionary containing
-        the following information:
-        - dataset id
-        - status
-
-        If qualities are calculated for the dataset, some of
-        these are also returned.
-    """
-    return _list_datasets("data/list")
-
-
-def list_datasets_paginate(offset,size):
-    """Return a partial list (of given size) dataset which are on OpenML, starting with offset.
-
     Parameters
     ----------
     offset : int
         the number of datasets to skip, starting from the first
     size : int
         the maximum datasets of tasks to show
+    tag : str
+        the tag to include
 
     Returns
     -------
     datasets : list of dicts
-        A partial list of datasets.
+        A list of datasets having the given tag (if applicable).
 
         Every dataset is represented by a dictionary containing
         the following information:
         - dataset id
         - status
-
+        
         If qualities are calculated for the dataset, some of
         these are also returned.
     """
-    try:
-        offset = int(offset)
-    except:
-        raise ValueError("Offset is neither an Integer nor can be "
-                         "cast to an Integer.")
-    try:
-        limit = int(size)
-    except:
-        raise ValueError("Size is neither an Integer nor can be "
-                         "cast to an Integer.")
-    return _list_datasets("data/list/offset/%d/limit/%d" % (offset, size))
-
-
-def list_datasets_by_tag(tag):
-    """Return all datasets having the given tag.
+    api_call = "data/list"
+    if offset is not None:
+        try:
+            offset = int(offset)
+            api_call += "/offset/%d" % offset
+        except:
+            raise ValueError("Offset is neither an Integer nor can be "
+                             "cast to an Integer.")
 
-    Returns
-    -------
-    datasets : list of dicts
-        A list of all datasets having the given tag. Every dataset is
-        represented by a dictionary containing the following information:
-        dataset id, and status. If qualities are calculated for the dataset,
-        some of these are also returned.
+    if size is not None:
+        try:
+            size = int(size)
+            api_call += "/limit/%d" % size
+        except:
+            raise ValueError("Size is neither an Integer nor can be "
+                             "cast to an Integer.")
+    if tag is not None:
+        api_call += "/tag/%s" % tag
 
-    """
-    return _list_datasets("data/list/%s" % tag)
+    return _list_datasets(api_call)
 
 
 def _list_datasets(api_call):
@@ -190,7 +167,7 @@ def _list_datasets(api_call):
     assert type(datasets_dict['oml:data']['oml:dataset']) == list, \
         type(datasets_dict['oml:data'])
     assert datasets_dict['oml:data']['@xmlns:oml'] == \
-        'http://openml.org/openml'
+           'http://openml.org/openml'
 
     datasets = []
     for dataset_ in datasets_dict['oml:data']['oml:dataset']:
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
@@ -1,9 +1,5 @@
 from .task import OpenMLTask
 from .split import OpenMLSplit
-from .functions import (get_task, list_tasks, list_tasks_by_type,
-                        list_tasks_by_tag, list_tasks_paginate,
-                        list_tasks_by_type_paginate)
+from .functions import (get_task, list_tasks)
 
-__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'list_tasks_by_type',
-           'list_tasks_by_tag', 'list_tasks_paginate', 'OpenMLSplit',
-           'list_tasks_by_type_paginate']
+__all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'OpenMLSplit']
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -89,131 +89,58 @@ def _get_estimation_procedure_list():
     return procs
 
 
-def list_tasks_by_type(task_type_id):
-    """Return a list of all tasks for a given tasks type which are on OpenML.
+def list_tasks(task_type_id=None, offset=None, size=None, tag=None):
+    """Return a number of tasks having the given tag and task_type_id
 
     Parameters
     ----------
     task_type_id : int
         ID of the task type as detailed
         `here <http://www.openml.org/search?type=task_type>`_.
-
-    Returns
-    -------
-    list
-        A list of all tasks of the given task type. Every task is represented by
-        a dictionary containing the following information: task id,
-        dataset id, task_type and status. If qualities are calculated for
-        the associated dataset, some of these are also returned.
-    """
-    try:
-        task_type_id = int(task_type_id)
-    except:
-        raise ValueError("Task Type ID is neither an Integer nor can be "
-                         "cast to an Integer.")
-    return _list_tasks("task/list/type/%d" % task_type_id)
-
-
-def list_tasks_paginate(offset,size):
-    """Return a partial list (of given size) tasks for a given tasks type, starting with offset.
-
-    Parameters
-    ----------
     offset : int
         the number of tasks to skip, starting from the first
     size : int
         the maximum number of tasks to show
-
-     Returns
-    -------
-    list
-        A partial list of tasks of the task type. Every task is represented by a
-        dictionary containing the following information: task id,
-        dataset id, task_type and status. If qualities are calculated for
-        the associated dataset, some of these are also returned.
-    """
-    try:
-        offset = int(offset)
-    except:
-        raise ValueError("Offset is neither an Integer nor can be "
-                         "cast to an Integer.")
-    try:
-        size = int(size)
-    except:
-        raise ValueError("Size is neither an Integer nor can be "
-                         "cast to an Integer.")
-    return _list_tasks("task/list/offset/%d/limit/%d" % (offset, size))
-
-
-def list_tasks_by_type_paginate(task_type_id,offset,size):
-    """Return a partial list (of given size) tasks, starting with offset.
-
-    Parameters
-    ----------
-    task_type_id : int
-        ID of the task type as detailed
-        `here <http://www.openml.org/search?type=task_type>`_.
-    offset : int
-        the number of tasks to skip, starting from the first
-    size : int
-        the maximum number of tasks to show
-
-     Returns
-    -------
-    list
-        A partial list of tasks. Every task is represented by a
-        dictionary containing the following information: task id,
-        dataset id, task_type and status. If qualities are calculated for
-        the associated dataset, some of these are also returned.
-    """
-    try:
-        task_type_id = int(task_type_id)
-    except:
-        raise ValueError("Task Type ID is neither an Integer nor can be "
-                         "cast to an Integer.")
-    try:
-        offset = int(offset)
-    except:
-        raise ValueError("Offset is neither an Integer nor can be "
-                         "cast to an Integer.")
-    try:
-        size = int(size)
-    except:
-        raise ValueError("Size is neither an Integer nor can be "
-                         "cast to an Integer.")
-    return _list_tasks("task/list/type/%d/offset/%d/limit/%d" % (task_type_id,offset, size))
-
-
-def list_tasks_by_tag(tag):
-    """Return all tasks having the given tag
-
-    Parameters
-    ----------
     tag : str
+        the tag to include
 
     Returns
     -------
     list
-        A list of all tasks having a give tag. Every task is represented by
-        a dictionary containing the following information: task id,
-        dataset id, task_type and status. If qualities are calculated for
-        the associated dataset, some of these are also returned.
+        A list of all tasks having the given task_type_id and the give tag.
+        Every task is represented by a dictionary containing the following
+        information: task id, dataset id, task_type and status. If qualities
+        are calculated for the associated dataset, some of these are also
+        returned.
     """
-    return _list_tasks("task/list/tag/%s" % tag)
-
+    api_call = "task/list"
+    if task_type_id is not None:
+        try:
+            task_type_id = int(task_type_id)
+            api_call += "/task_type_id/%d" % task_type_id
+        except:
+            raise ValueError("Task_type_id is neither an Integer nor can be "
+                             "cast to an Integer.")
 
-def list_tasks():
-    """Return a list of all tasks which are on OpenML.
+    if offset is not None:
+        try:
+            offset = int(offset)
+            api_call += "/offset/%d" % offset
+        except:
+            raise ValueError("Offset is neither an Integer nor can be "
+                             "cast to an Integer.")
 
-    Returns
-    -------
-    list
-        A list of all tasks. Every task is represented by a
-        dictionary containing the following information: task id,
-        dataset id, task_type and status. If qualities are calculated for
-        the associated dataset, some of these are also returned.
-    """
-    return _list_tasks('task/list')
+    if size is not None:
+        try:
+            size = int(size)
+            api_call += "/limit/%d" % size
+        except:
+            raise ValueError("Size is neither an Integer nor can be "
+                             "cast to an Integer.")
+    if tag is not None:
+        api_call += "/tag/%s" % tag
+
+    return _list_tasks(api_call)
 
 
 def _list_tasks(api_call):
diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py
@@ -88,7 +88,7 @@ def test_list_datasets(self):
                                               'deactivated'])
 
     def test_list_datasets_by_tag(self):
-        datasets = openml.datasets.list_datasets_by_tag('uci')
+        datasets = openml.datasets.list_datasets(tag='uci')
         self.assertGreaterEqual(len(datasets), 5)
         for dataset in datasets:
             self.assertEqual(type(dataset), dict)
@@ -104,7 +104,7 @@ def test_list_datasets_paginate(self):
         size = 10
         max = 100
         for i in range(0, max, size):
-            data = openml.datasets.list_datasets_paginate(i, size)
+            data = openml.datasets.list_datasets(offset=i, size=size)
             self.assertGreaterEqual(size, len(data))
             for dataset in data:
                 self.assertEqual(type(dataset), dict)
diff --git a/tests/tasks/test_task_functions.py b/tests/tasks/test_task_functions.py
@@ -51,13 +51,13 @@ def _check_task(self, task):
                       ['in_preparation', 'active', 'deactivated'])
 
     def test_list_tasks_by_type(self):
-        tasks = openml.tasks.list_tasks_by_type(task_type_id=3)
+        tasks = openml.tasks.list_tasks(task_type_id=3)
         self.assertGreaterEqual(len(tasks), 300)
         for task in tasks:
             self._check_task(task)
 
     def test_list_tasks_by_tag(self):
-        tasks = openml.tasks.list_tasks_by_tag('basic')
+        tasks = openml.tasks.list_tasks(tag='basic')
         self.assertGreaterEqual(len(tasks), 57)
         for task in tasks:
             self._check_task(task)
@@ -72,7 +72,7 @@ def test_list_tasks_paginate(self):
         size = 10
         max = 100
         for i in range(0, max, size):
-            tasks = openml.tasks.list_tasks_paginate(i, size)
+            tasks = openml.tasks.list_tasks(offset=i, size=size)
             self.assertGreaterEqual(size, len(tasks))
             for task in tasks:
                 self.assertEqual(type(task), dict)
@@ -91,7 +91,7 @@ def test_list_tasks_per_type_paginate(self):
         task_types = 5
         for j in range(1,task_types):
             for i in range(0, max, size):
-                tasks = openml.tasks.list_tasks_by_type_paginate(j, i, size)
+                tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size)
                 self.assertGreaterEqual(size, len(tasks))
                 for task in tasks:
                     self.assertEqual(type(task), dict)