Feature: download flow list

mfeurer · zardaloop · commit 3f261a9bf82d · 2015-09-04T15:26:17.000+02:00
diff --git a/openml/apiconnector.py b/openml/apiconnector.py
@@ -832,7 +832,7 @@ def get_runs_list(self, task_id=None, flow_id=None, setup_id=None):
         Returns
         -------
         list
-            A list of all runs run IDs for a given ID.
+            A list of all runs for a given ID.
         """
         test = [task_id is None, flow_id is None, setup_id is None]
         if np.nansum(test) != 2:
@@ -843,7 +843,7 @@ def get_runs_list(self, task_id=None, flow_id=None, setup_id=None):
         if task_id is not None:
             call += "?task_id=%d" % task_id
         elif flow_id is not None:
-            call += "?implementation_id=%d" % flow_id
+            call += "?flow_id=%d" % flow_id
         elif setup_id is not None:
             call += "?setup_id=%d" % setup_id
 
@@ -865,8 +865,9 @@ def get_runs_list(self, task_id=None, flow_id=None, setup_id=None):
                 run = {'run_id': int(runs_['oml:run_id']),
                        'task_id': int(runs_['oml:task_id']),
                        'setup_id': int(runs_['oml:setup_id']),
-                       'implementation_id': int(runs_['oml:implementation_id']),
-                       'uploader': int(runs_['oml:uploader'])}
+                       'flow_id': int(runs_['oml:flow_id']),
+                       'uploader': int(runs_['oml:uploader']),
+                       'error_message': runs_['oml:error_message']}
 
                 runs.append(run)
             runs.sort(key=lambda t: t['run_id'])
@@ -956,10 +957,46 @@ def _create_run_from_xml(self, xml):
 
         return OpenMLRun(
             dic[u"oml:run_id"], dic[u"oml:uploader"],
-            dic[u"oml:task_id"], dic[u"oml:implementation_id"],
+            dic[u"oml:task_id"], dic[u"oml:flow_id"],
             dic[u"oml:setup_string"], dic[u'oml:setup_id'],
             tags, datasets, files, evaluations)
 
+    ############################################################################
+    # Flows
+    def get_flow_list(self):
+        """Return a list of all flows on OpenML.
+
+        Returns
+        -------
+        list
+            A list of all flows.
+        """
+        return_code, xml_string = self._perform_api_call("/flow/list")
+        datasets_dict = xmltodict.parse(xml_string)
+
+        if isinstance(datasets_dict['oml:flows']['oml:flow'], dict):
+            flows = [datasets_dict['oml:implementations']['oml:implementation']]
+        else:
+            # Minimalistic check if the XML is useful
+            assert type(datasets_dict['oml:flows']['oml:flow']) == list, \
+                type(datasets_dict['oml:flows']['oml:flow'])
+            assert datasets_dict['oml:flows']['@xmlns:oml'] == \
+                   'http://openml.org/openml'
+
+            flows = []
+            for flow_ in datasets_dict['oml:flows']['oml:flow']:
+                flow = {'id': int(flow_['oml:id']),
+                        'full_name': flow_['oml:full_name'],
+                        'name': flow_['oml:name'],
+                        'version': flow_['oml:version'],
+                        'external_version': flow_['oml:external_version'],
+                        'uploader': int(flow_['oml:uploader'])}
+
+                flows.append(flow)
+            flows.sort(key=lambda t: t['id'])
+
+        return flows
+
     ############################################################################
     # Internal stuff
     def _perform_api_call(self, call, data=None, file_path=None):
diff --git a/source/progress.rst b/source/progress.rst
@@ -32,6 +32,7 @@ API call                                        implemented tested properly test
 /task/delete
 /tasktype/list
 /tasktype/{task_id}
+/flow/list                                      yes
 /flow/tag
 /flow/untag
 /flow/{flow_id}
diff --git a/tests/test_apiconnector.py b/tests/test_apiconnector.py
@@ -226,7 +226,7 @@ def test_download_split(self):
     def test_download_run_list(self):
         def check_run(run):
             self.assertIsInstance(run, dict)
-            self.assertEqual(len(run), 5)
+            self.assertEqual(len(run), 6)
 
         runs = self.connector.get_runs_list(task_id=1)
         # 1759 as the number of supervised classification tasks retrieved
@@ -238,13 +238,13 @@ def check_run(run):
 
         runs = self.connector.get_runs_list(flow_id=1)
         self.assertGreaterEqual(len(runs), 1)
-        for task in runs:
-            check_run(task)
+        for run in runs:
+            check_run(run)
 
         runs = self.connector.get_runs_list(setup_id=1)
         self.assertGreaterEqual(len(runs), 261)
-        for task in runs:
-            check_run(task)
+        for run in runs:
+            check_run(run)
 
     def test_download_run(self):
         run = self.connector.download_run(473350)
@@ -254,6 +254,18 @@ def test_download_run(self):
         self.assertGreaterEqual(len(run.evaluations), 18)
         self.assertEqual(len(run.evaluations['f_measure']), 2)
 
+    # ###########################################################################
+    # Flows
+    def test_download_flow_list(self):
+        def check_flow(flow):
+            self.assertIsInstance(flow, dict)
+            self.assertEqual(len(flow), 6)
+
+        flows = self.connector.get_flow_list()
+        self.assertGreaterEqual(len(flows), 1448)
+        for flow in flows:
+            check_flow(flow)
+
     def test_upload_dataset(self):
 
         dataset = self.connector.download_dataset(3)