fix 136, return task list as dictionary

janvanrijn · janvanrijn · commit 41a4a74ccaf7 · 2016-09-02T14:19:14.000+02:00
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -145,12 +145,15 @@ def _list_tasks(api_call):
                          '"oml:runs"/@xmlns:oml is not '
                          '"http://openml.org/openml": %s'
                          % str(tasks_dict))
+
     try:
-        tasks = []
+        tasks = dict();
         procs = _get_estimation_procedure_list()
         proc_dict = dict((x['id'], x) for x in procs)
         for task_ in tasks_dict['oml:tasks']['oml:task']:
-            task = {'tid': int(task_['oml:task_id']),
+            tid = int(task_['oml:task_id'])
+            task = {'tid': tid,
+                    'ttid': int(task_['oml:task_type_id']),
                     'did': int(task_['oml:did']),
                     'name': task_['oml:name'],
                     'task_type': task_['oml:task_type'],
@@ -170,12 +173,10 @@ def _list_tasks(api_call):
                 if abs(int(quality['#text']) - quality['#text']) < 0.0000001:
                     quality['#text'] = int(quality['#text'])
                 task[quality['@name']] = quality['#text']
-            tasks.append(task)
+            tasks[tid] = task
     except KeyError as e:
         raise KeyError("Invalid xml for task: %s" % e)
 
-    tasks.sort(key=lambda t: t['tid'])
-
     return tasks
 
 
@@ -245,7 +246,7 @@ def _create_task_from_xml(xml):
         estimation_parameters[name] = text
 
     return OpenMLTask(
-        dic["oml:task_id"], dic["oml:task_type"],
+        dic["oml:task_id"], dic['oml:task_type_id'], dic["oml:task_type"],
         inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
         inputs["source_data"]["oml:data_set"]["oml:target_feature"],
         inputs["estimation_procedure"]["oml:estimation_procedure"][
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -9,8 +9,8 @@
 
 
 class OpenMLTask(object):
-    def __init__(self, task_id, task_type, data_set_id, target_name,
-                 estimation_procedure_type, data_splits_url,
+    def __init__(self, task_id, task_type_id, task_type, data_set_id,
+                 target_name, estimation_procedure_type, data_splits_url,
                  estimation_parameters, evaluation_measure, cost_matrix,
                  class_labels=None):
         self.task_id = int(task_id)
diff --git a/tests/files/tasks/1/task.xml b/tests/files/tasks/1/task.xml
@@ -1,34 +1,35 @@
 <oml:task xmlns:oml="http://openml.org/openml">
 	<oml:task_id>1</oml:task_id>
+  <oml:task_type_id>1</oml:task_type_id>
 	<oml:task_type>Supervised Classification</oml:task_type>
 	      <oml:input name="source_data">
-    <oml:data_set>
-<oml:data_set_id>1</oml:data_set_id>
-<oml:target_feature>class</oml:target_feature>
+    <oml:data_set>
+<oml:data_set_id>1</oml:data_set_id>
+<oml:target_feature>class</oml:target_feature>
 </oml:data_set>  </oml:input>
 	      <oml:input name="estimation_procedure">
-    <oml:estimation_procedure>
-<oml:type>crossvalidation</oml:type>
-<oml:data_splits_url>http://www.openml.org/api_splits/get/1/Task_1_splits.arff</oml:data_splits_url>
-<oml:parameter name="number_repeats">1</oml:parameter>
-<oml:parameter name="number_folds">10</oml:parameter>
-<oml:parameter name="percentage"></oml:parameter>
-<oml:parameter name="stratified_sampling">true</oml:parameter>
+    <oml:estimation_procedure>
+<oml:type>crossvalidation</oml:type>
+<oml:data_splits_url>http://www.openml.org/api_splits/get/1/Task_1_splits.arff</oml:data_splits_url>
+<oml:parameter name="number_repeats">1</oml:parameter>
+<oml:parameter name="number_folds">10</oml:parameter>
+<oml:parameter name="percentage"></oml:parameter>
+<oml:parameter name="stratified_sampling">true</oml:parameter>
 </oml:estimation_procedure>  </oml:input>
 	      <oml:input name="cost_matrix">
     <oml:cost_matrix></oml:cost_matrix>  </oml:input>
 	      <oml:input name="evaluation_measures">
-    <oml:evaluation_measures>
-<oml:evaluation_measure>predictive_accuracy</oml:evaluation_measure>
+    <oml:evaluation_measures>
+<oml:evaluation_measure>predictive_accuracy</oml:evaluation_measure>
 </oml:evaluation_measures>  </oml:input>
 	      <oml:output name="predictions">
-    <oml:predictions>
-<oml:format>ARFF</oml:format>
-<oml:feature name="repeat" type="integer"/>
-<oml:feature name="fold" type="integer"/>
-<oml:feature name="row_id" type="integer"/>
-<oml:feature name="confidence.classname" type="numeric"/>
-<oml:feature name="prediction" type="string"/>
+    <oml:predictions>
+<oml:format>ARFF</oml:format>
+<oml:feature name="repeat" type="integer"/>
+<oml:feature name="fold" type="integer"/>
+<oml:feature name="row_id" type="integer"/>
+<oml:feature name="confidence.classname" type="numeric"/>
+<oml:feature name="prediction" type="string"/>
 </oml:predictions>  </oml:output>
 	    <oml:tag>basic</oml:tag>
     <oml:tag>study_1</oml:tag>
diff --git a/tests/files/tasks/1882/task.xml b/tests/files/tasks/1882/task.xml
@@ -1,34 +1,35 @@
 <oml:task xmlns:oml="http://openml.org/openml">
 	<oml:task_id>1882</oml:task_id>
+  <oml:task_type_id>1</oml:task_type_id>
 	<oml:task_type>Supervised Classification</oml:task_type>
 	      <oml:input name="source_data">
-    <oml:data_set>
-<oml:data_set_id>2</oml:data_set_id>
-<oml:target_feature>class</oml:target_feature>
+    <oml:data_set>
+<oml:data_set_id>2</oml:data_set_id>
+<oml:target_feature>class</oml:target_feature>
 </oml:data_set>  </oml:input>
 	      <oml:input name="estimation_procedure">
-    <oml:estimation_procedure>
-<oml:type>crossvalidation</oml:type>
-<oml:data_splits_url>http://capa.win.tue.nl/api_splits/get/1882/Task_1882_splits.arff</oml:data_splits_url>
-<oml:parameter name="number_repeats">10</oml:parameter>
-<oml:parameter name="number_folds">10</oml:parameter>
-<oml:parameter name="percentage"></oml:parameter>
-<oml:parameter name="stratified_sampling">true</oml:parameter>
+    <oml:estimation_procedure>
+<oml:type>crossvalidation</oml:type>
+<oml:data_splits_url>http://capa.win.tue.nl/api_splits/get/1882/Task_1882_splits.arff</oml:data_splits_url>
+<oml:parameter name="number_repeats">10</oml:parameter>
+<oml:parameter name="number_folds">10</oml:parameter>
+<oml:parameter name="percentage"></oml:parameter>
+<oml:parameter name="stratified_sampling">true</oml:parameter>
 </oml:estimation_procedure>  </oml:input>
 	      <oml:input name="cost_matrix">
     <oml:cost_matrix></oml:cost_matrix>  </oml:input>
 	      <oml:input name="evaluation_measures">
-    <oml:evaluation_measures>
-<oml:evaluation_measure>predictive_accuracy</oml:evaluation_measure>
+    <oml:evaluation_measures>
+<oml:evaluation_measure>predictive_accuracy</oml:evaluation_measure>
 </oml:evaluation_measures>  </oml:input>
 	      <oml:output name="predictions">
-    <oml:predictions>
-<oml:format>ARFF</oml:format>
-<oml:feature name="repeat" type="integer"/>
-<oml:feature name="fold" type="integer"/>
-<oml:feature name="row_id" type="integer"/>
-<oml:feature name="confidence.classname" type="numeric"/>
-<oml:feature name="prediction" type="string"/>
+    <oml:predictions>
+<oml:format>ARFF</oml:format>
+<oml:feature name="repeat" type="integer"/>
+<oml:feature name="fold" type="integer"/>
+<oml:feature name="row_id" type="integer"/>
+<oml:feature name="confidence.classname" type="numeric"/>
+<oml:feature name="prediction" type="string"/>
 </oml:predictions>  </oml:output>
 	    <oml:tag>under100k</oml:tag>
     <oml:tag>under1m</oml:tag>
diff --git a/tests/files/tasks/3/task.xml b/tests/files/tasks/3/task.xml
@@ -1,34 +1,35 @@
 <oml:task xmlns:oml="http://openml.org/openml">
 	<oml:task_id>3</oml:task_id>
+  <oml:task_type_id>1</oml:task_type_id>
 	<oml:task_type>Supervised Classification</oml:task_type>
 	      <oml:input name="source_data">
-    <oml:data_set>
-<oml:data_set_id>3</oml:data_set_id>
-<oml:target_feature>class</oml:target_feature>
+    <oml:data_set>
+<oml:data_set_id>3</oml:data_set_id>
+<oml:target_feature>class</oml:target_feature>
 </oml:data_set>  </oml:input>
 	      <oml:input name="estimation_procedure">
-    <oml:estimation_procedure>
-<oml:type>crossvalidation</oml:type>
-<oml:data_splits_url>http://www.openml.org/api_splits/get/3/Task_3_splits.arff</oml:data_splits_url>
-<oml:parameter name="number_repeats">1</oml:parameter>
-<oml:parameter name="number_folds">10</oml:parameter>
-<oml:parameter name="percentage"></oml:parameter>
-<oml:parameter name="stratified_sampling">true</oml:parameter>
+    <oml:estimation_procedure>
+<oml:type>crossvalidation</oml:type>
+<oml:data_splits_url>http://www.openml.org/api_splits/get/3/Task_3_splits.arff</oml:data_splits_url>
+<oml:parameter name="number_repeats">1</oml:parameter>
+<oml:parameter name="number_folds">10</oml:parameter>
+<oml:parameter name="percentage"></oml:parameter>
+<oml:parameter name="stratified_sampling">true</oml:parameter>
 </oml:estimation_procedure>  </oml:input>
 	      <oml:input name="cost_matrix">
     <oml:cost_matrix></oml:cost_matrix>  </oml:input>
 	      <oml:input name="evaluation_measures">
-    <oml:evaluation_measures>
-<oml:evaluation_measure>predictive_accuracy</oml:evaluation_measure>
+    <oml:evaluation_measures>
+<oml:evaluation_measure>predictive_accuracy</oml:evaluation_measure>
 </oml:evaluation_measures>  </oml:input>
 	      <oml:output name="predictions">
-    <oml:predictions>
-<oml:format>ARFF</oml:format>
-<oml:feature name="repeat" type="integer"/>
-<oml:feature name="fold" type="integer"/>
-<oml:feature name="row_id" type="integer"/>
-<oml:feature name="confidence.classname" type="numeric"/>
-<oml:feature name="prediction" type="string"/>
+    <oml:predictions>
+<oml:format>ARFF</oml:format>
+<oml:feature name="repeat" type="integer"/>
+<oml:feature name="fold" type="integer"/>
+<oml:feature name="row_id" type="integer"/>
+<oml:feature name="confidence.classname" type="numeric"/>
+<oml:feature name="prediction" type="string"/>
 </oml:predictions>  </oml:output>
 	    <oml:tag>basic</oml:tag>
     <oml:tag>mythbusting</oml:tag>
diff --git a/tests/tasks/test_task_functions.py b/tests/tasks/test_task_functions.py
@@ -53,37 +53,29 @@ def _check_task(self, task):
     def test_list_tasks_by_type(self):
         tasks = openml.tasks.list_tasks(task_type_id=3)
         self.assertGreaterEqual(len(tasks), 300)
-        for task in tasks:
-            self._check_task(task)
+        for tid in tasks:
+            self._check_task(tasks[tid])
 
     def test_list_tasks_by_tag(self):
         tasks = openml.tasks.list_tasks(tag='basic')
         self.assertGreaterEqual(len(tasks), 57)
-        for task in tasks:
-            self._check_task(task)
+        for tid in tasks:
+            self._check_task(tasks[tid])
 
     def test_list_tasks(self):
         tasks = openml.tasks.list_tasks()
         self.assertGreaterEqual(len(tasks), 2000)
-        for task in tasks:
-            self._check_task(task)
+        for tid in tasks:
+            self._check_task(tasks[tid])
 
     def test_list_tasks_paginate(self):
         size = 10
         max = 100
         for i in range(0, max, size):
             tasks = openml.tasks.list_tasks(offset=i, size=size)
             self.assertGreaterEqual(size, len(tasks))
-            for task in tasks:
-                self.assertEqual(type(task), dict)
-                self.assertGreaterEqual(len(task), 4)
-                self.assertIn('tid', task)
-                self.assertIsInstance(task['tid'], int)
-                self.assertIn('did', task)
-                self.assertIsInstance(task['did'], int)
-                self.assertIn('status', task)
-                self.assertTrue(is_string(task['status']))
-                self.assertIn(task['status'], ['in_preparation', 'active', 'deactivated'])
+            for tid in tasks:
+                self._check_task(tasks[tid])
 
     def test_list_tasks_per_type_paginate(self):
         size = 10
@@ -93,16 +85,8 @@ def test_list_tasks_per_type_paginate(self):
             for i in range(0, max, size):
                 tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size)
                 self.assertGreaterEqual(size, len(tasks))
-                for task in tasks:
-                    self.assertEqual(type(task), dict)
-                    self.assertGreaterEqual(len(task), 4)
-                    self.assertIn('tid', task)
-                    self.assertIsInstance(task['tid'], int)
-                    self.assertIn('did', task)
-                    self.assertIsInstance(task['did'], int)
-                    self.assertIn('status', task)
-                    self.assertTrue(is_string(task['status']))
-                    self.assertIn(task['status'], ['in_preparation', 'active', 'deactivated'])
+                for tid in tasks:
+                    self._check_task(tasks[tid])
 
     def test__get_task(self):
         openml.config.set_cache_directory(self.static_cache_dir)