Merge branch 'develop' into improve_error_logging

mfeurer · web-flow · commit 200b56c8b4f5 · 2017-11-29T09:53:43.000+01:00
diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py
@@ -16,11 +16,13 @@ class OpenMLDataFeature(object):
        """
     LEGAL_DATA_TYPES = ['nominal', 'numeric', 'string', 'date']
 
-    def __init__(self, index, name, data_type, nominal_values, number_missing_values):
+    def __init__(self, index, name, data_type, nominal_values,
+                 number_missing_values):
         if type(index) != int:
             raise ValueError('Index is of wrong datatype')
         if data_type not in self.LEGAL_DATA_TYPES:
-            raise ValueError('data type should be in %s, found: %s' %(str(self.LEGAL_DATA_TYPES),data_type))
+            raise ValueError('data type should be in %s, found: %s' %
+                             (str(self.LEGAL_DATA_TYPES), data_type))
         if nominal_values is not None and type(nominal_values) != list:
             raise ValueError('Nominal_values is of wrong datatype')
         if type(number_missing_values) != int:
@@ -33,4 +35,7 @@ def __init__(self, index, name, data_type, nominal_values, number_missing_values
         self.number_missing_values = number_missing_values
 
     def __str__(self):
-        return "[%d - %s (%s)]" %(self.index, self.name, self.data_type)
+        return "[%d - %s (%s)]" % (self.index, self.name, self.data_type)
+
+    def _repr_pretty_(self, pp, cycle):
+        pp.text(str(self))
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
@@ -373,13 +373,17 @@ def get_features_by_type(self, data_type, exclude=None,
         result : list
             a list of indices that have the specified data type
         '''
-        assert data_type in OpenMLDataFeature.LEGAL_DATA_TYPES, "Illegal feature type requested"
+        if data_type not in OpenMLDataFeature.LEGAL_DATA_TYPES:
+            raise TypeError("Illegal feature type requested")
         if self.ignore_attributes is not None:
-            assert type(self.ignore_attributes) is list, "ignore_attributes should be a list"
+            if not isinstance(self.ignore_attributes, list):
+                raise TypeError("ignore_attributes should be a list")
         if self.row_id_attribute is not None:
-            assert type(self.row_id_attribute) is str, "row id attribute should be a str"
+            if not isinstance(self.row_id_attribute, six.string_types):
+                raise TypeError("row id attribute should be a str")
         if exclude is not None:
-            assert type(exclude) is list, "Exclude should be a list"
+            if not isinstance(exclude, list):
+                raise TypeError("Exclude should be a list")
             # assert all(isinstance(elem, str) for elem in exclude), "Exclude should be a list of strings"
         to_exclude = []
         if exclude is not None:
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -142,7 +142,7 @@ def _list_tasks(api_call):
         xml_string = _perform_api_call(api_call)
     except OpenMLServerNoResult:
         return []
-    tasks_dict = xmltodict.parse(xml_string, force_list=('oml:task',))
+    tasks_dict = xmltodict.parse(xml_string, force_list=('oml:task','oml:input'))
     # Minimalistic check if the XML is useful
     if 'oml:tasks' not in tasks_dict:
         raise ValueError('Error in return XML, does not contain "oml:runs": %s'
diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,7 @@ xmltodict
 nose
 requests
 scikit-learn>=0.18
+nbconvert
 nbformat
 python-dateutil
-oslo.concurrency
+oslo.concurrency
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
@@ -42,6 +42,12 @@ def test__get_estimation_procedure_list(self):
         self.assertIsInstance(estimation_procedures[0], dict)
         self.assertEqual(estimation_procedures[0]['task_type_id'], 1)
 
+    def test_list_clustering_task(self):
+        # as shown by #383, clustering tasks can give list/dict casting problems
+        openml.config.server = self.production_server
+        openml.tasks.list_tasks(task_type_id=5, size=10)
+        # the expected outcome is that it doesn't crash. No assertions.
+
     def _check_task(self, task):
         self.assertEqual(type(task), dict)
         self.assertGreaterEqual(len(task), 2)
@@ -127,7 +133,6 @@ def assert_and_raise(*args, **kwargs):
             os.path.join(os.getcwd(), "tasks", "1", "tasks.xml")
         ))
 
-
     def test_get_task_with_cache(self):
         openml.config.set_cache_directory(self.static_cache_dir)
         task = openml.tasks.get_task(1)