openml
diff --git a/‎openml/__init__.py‎
Lines changed: 4 additions & 4 deletions b/‎openml/__init__.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎openml/datasets/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎openml/datasets/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openml/datasets/data_feature.py‎
Lines changed: 12 additions & 11 deletions b/‎openml/datasets/data_feature.py‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 36 additions & 5 deletions b/‎openml/datasets/dataset.py‎
Lines changed: 36 additions & 5 deletions
diff --git a/‎openml/flows/functions.py‎
Lines changed: 2 additions & 2 deletions b/‎openml/flows/functions.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openml/runs/functions.py‎
Lines changed: 16 additions & 20 deletions b/‎openml/runs/functions.py‎
Lines changed: 16 additions & 20 deletions
diff --git a/‎openml/utils/__init__.py‎
Lines changed: 0 additions & 3 deletions b/‎openml/utils/__init__.py‎
Lines changed: 0 additions & 3 deletions
@@ -16,17 +16,17 @@
 """
 from . import config
 
-from .datasets import OpenMLDataset
+from .datasets import OpenMLDataset, OpenMLDataFeature
 from . import datasets
 from . import runs
 from . import flows
 from .runs import OpenMLRun
 from .tasks import OpenMLTask, OpenMLSplit
 from .flows import OpenMLFlow
-from .utils import ConditionalImputer
 
 
 __version__ = "0.2.1"
 
-__all__ = ['OpenMLDataset', 'OpenMLRun', 'OpenMLSplit', 'datasets',
-           'OpenMLTask', 'OpenMLFlow', 'config', 'runs', 'flows']
+__all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
+           'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
+           'config', 'runs', 'flows']
@@ -4,4 +4,4 @@
 from .data_feature import OpenMLDataFeature
 
 __all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
-           'OpenMLDataset', 'list_datasets']
+           'OpenMLDataset', 'OpenMLDataFeature', 'list_datasets']
@@ -6,9 +6,9 @@ class OpenMLDataFeature(object):
        ----------
        index : int
             The index of this feature
-        name : string
+        name : str
             Name of the feature
-        data_type : string
+        data_type : str
             can be nominal, numeric, string, date (corresponds to arff)
         nominal_values : list(str)
             list of the possible values, in case of nominal attribute
@@ -17,17 +17,18 @@ class OpenMLDataFeature(object):
     LEGAL_DATA_TYPES = ['nominal', 'numeric', 'string', 'date']
 
     def __init__(self, index, name, data_type, nominal_values, number_missing_values):
-        assert type(index) is int, "Index is of wrong datatype"
-        assert type(name) is str, "Name is of wrong datatype"
-        assert type(data_type) is str, "Data_type is of wrong datatype"
-        assert data_type in self.LEGAL_DATA_TYPES, "data type should be in %s" %str(self.LEGAL_DATA_TYPES)
-        if nominal_values is not None:
-            assert type(nominal_values) is list, "Nominal_values is of wrong datatype"
-        assert type(number_missing_values) is int, "number_missing_values is of wrong datatype"
+        if type(index) != int:
+            raise ValueError('Index is of wrong datatype')
+        if data_type not in self.LEGAL_DATA_TYPES:
+            raise ValueError('data type should be in %s, found: %s' %(str(self.LEGAL_DATA_TYPES),data_type))
+        if nominal_values is not None and type(nominal_values) != list:
+            raise ValueError('Nominal_values is of wrong datatype')
+        if type(number_missing_values) != int:
+            raise ValueError('number_missing_values is of wrong datatype')
 
         self.index = index
-        self.name = name
-        self.data_type = data_type
+        self.name = str(name)
+        self.data_type = str(data_type)
         self.nominal_values = nominal_values
         self.number_missing_values = number_missing_values
 
 
@@ -2,6 +2,7 @@
 import io
 import logging
 import os
+import six
 import sys
 
 import arff
@@ -10,7 +11,7 @@
 import scipy.sparse
 import xmltodict
 
-from ..datasets.data_feature import OpenMLDataFeature
+from .data_feature import OpenMLDataFeature
 from ..exceptions import PyOpenMLError
 
 if sys.version_info[0] >= 3:
@@ -65,10 +66,14 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
         self.default_target_attribute = default_target_attribute
         self.row_id_attribute = row_id_attribute
         self.ignore_attributes = None
-        if isinstance(ignore_attribute, str):
+        if isinstance(ignore_attribute, six.string_types):
             self.ignore_attributes = [ignore_attribute]
         elif isinstance(ignore_attribute, list):
             self.ignore_attributes = ignore_attribute
+        elif ignore_attribute is None:
+            pass
+        else:
+            raise ValueError('wrong data type for ignore_attribute. Should be list. ')
         self.version_label = version_label
         self.citation = citation
         self.tag = tag
@@ -88,7 +93,8 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
                                             xmlfeature['oml:data_type'],
                                             None, #todo add nominal values (currently not in database)
                                             int(xmlfeature['oml:number_of_missing_values']))
-                assert idx == feature.index, "Data features not provided in right order"
+                if idx != feature.index:
+                    raise ValueError('Data features not provided in right order')
                 self.features[feature.index] = feature
 
 
@@ -313,15 +319,40 @@ def retrieve_class_labels(self, target_name='class'):
             return None
 
 
-    def get_features_by_type(self, data_type, exclude=None, exclude_ignore_attributes=True, exclude_row_id_attribute=True):
+    def get_features_by_type(self, data_type, exclude=None,
+                             exclude_ignore_attributes=True,
+                             exclude_row_id_attribute=True):
+        '''
+        Returns indices of features of a given type, e.g., all nominal features.
+        Can use additional parameters to exclude various features by index or ontology.
+
+        Parameters
+        ----------
+        data_type : str
+            The data type to return (e.g., nominal, numeric, date, string)
+        exclude : list(int)
+            Indices to exclude (and adapt the return values as if these indices
+                        are not present)
+        exclude_ignore_attributes : bool
+            Whether to exclude the defined ignore attributes (and adapt the
+            return values as if these indices are not present)
+        exclude_row_id_attribute : bool
+            Whether to exclude the defined row id attributes (and adapt the
+            return values as if these indices are not present)
+
+        Returns
+        -------
+        result : list
+            a list of indices that have the specified data type
+        '''
         assert data_type in OpenMLDataFeature.LEGAL_DATA_TYPES, "Illegal feature type requested"
         if self.ignore_attributes is not None:
             assert type(self.ignore_attributes) is list, "ignore_attributes should be a list"
         if self.row_id_attribute is not None:
             assert type(self.row_id_attribute) is str, "row id attribute should be a str"
         if exclude is not None:
             assert type(exclude) is list, "Exclude should be a list"
-            assert all(isinstance(elem, str) for elem in exclude), "Exclude should be a list of strings"
+            # assert all(isinstance(elem, str) for elem in exclude), "Exclude should be a list of strings"
         to_exclude = []
         if exclude is not None:
             to_exclude.extend(exclude)
 
@@ -66,10 +66,10 @@ def list_flows(offset=None, size=None, tag=None):
     if tag is not None:
         api_call += "/tag/%s" % tag
 
-    return _list_datasets(api_call)
+    return _list_flows(api_call)
 
 
-def _list_datasets(api_call):
+def _list_flows(api_call):
     # TODO add proper error handling here!
     xml_string = _perform_api_call(api_call)
     flows_dict = xmltodict.parse(xml_string)
 
@@ -7,7 +7,7 @@
 import openml
 from sklearn.model_selection._search import BaseSearchCV
 
-from build.lib.openml.exceptions import PyOpenMLError
+from ..exceptions import PyOpenMLError
 from .. import config
 from ..flows import sklearn_to_flow, get_flow
 from ..setups import setup_exists
@@ -68,12 +68,7 @@ def run_task(task, model):
 
     # execute the run
     run = OpenMLRun(task_id=task.task_id, flow_id=None, dataset_id=dataset.dataset_id, model=model)
-
-    try:
-        run.data_content, run.trace_content = _run_task_get_arffcontent(model, task, class_labels)
-    except PyOpenMLError as message:
-        run.error_message = str(message)
-        warnings.warn("Run terminated with error: %s" %run.error_message)
+    run.data_content, run.trace_content = _run_task_get_arffcontent(model, task, class_labels)
 
     if flow_id < 0:
         flow.publish()
@@ -110,8 +105,10 @@ def _run_exists(task_id, setup_id):
 
 
 
-def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label, predicted_probabilities, class_labels, model_classes_mapping):
-    """Complicated util function that turns probability estimates of a classifier for a given instance into the right arff format to upload to openml.
+def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
+                       predicted_probabilities, class_labels, model_classes_mapping):
+    """Util function that turns probability estimates of a classifier for a given
+        instance into the right arff format to upload to openml.
 
         Parameters
         ----------
@@ -126,6 +123,9 @@ def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
         predicted_probabilities : array (size=num_classes)
             probabilities per class
         class_labels : array (size=num_classes)
+        model_classes_mapping : list
+            A list of classes the model produced.
+            Obtained by BaseEstimator.classes_
 
         Returns
         -------
@@ -162,17 +162,13 @@ def _run_task_get_arffcontent(model, task, class_labels):
             testX = X[test_indices]
             testY = Y[test_indices]
 
-            try:
-                model.fit(trainX, trainY)
-
-                if isinstance(model, BaseSearchCV):
-                    _add_results_to_arfftrace(arff_tracecontent, fold_no, model, rep_no)
-                    model_classes = model.best_estimator_.classes_
-                else:
-                    model_classes = model.classes_
-            except AttributeError as e:
-                # typically happens when training a regressor on classification task
-                raise PyOpenMLError(str(e))
+            model.fit(trainX, trainY)
+
+            if isinstance(model, BaseSearchCV):
+                _add_results_to_arfftrace(arff_tracecontent, fold_no, model, rep_no)
+                model_classes = model.best_estimator_.classes_
+            else:
+                model_classes = model.classes_
 
             ProbaY = model.predict_proba(testX)
             PredY = model.predict(testX)