openml
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 5 additions & 3 deletions b/‎openml/datasets/dataset.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎openml/extensions/sklearn/extension.py‎
Lines changed: 30 additions & 23 deletions b/‎openml/extensions/sklearn/extension.py‎
Lines changed: 30 additions & 23 deletions
diff --git a/‎openml/runs/functions.py‎
Lines changed: 10 additions & 4 deletions b/‎openml/runs/functions.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎openml/runs/run.py‎
Lines changed: 33 additions & 18 deletions b/‎openml/runs/run.py‎
Lines changed: 33 additions & 18 deletions
diff --git a/‎openml/tasks/__init__.py‎
Lines changed: 7 additions & 1 deletion b/‎openml/tasks/__init__.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎openml/tasks/functions.py‎
Lines changed: 67 additions & 0 deletions b/‎openml/tasks/functions.py‎
Lines changed: 67 additions & 0 deletions
@@ -669,15 +669,17 @@ def publish(self):
                 path = os.path.abspath(self.data_file)
                 if os.path.exists(path):
                     try:
-                        # check if arff is valid
-                        decoder = arff.ArffDecoder()
+
                         with io.open(path, encoding='utf8') as fh:
+                            # check if arff is valid
+                            decoder = arff.ArffDecoder()
                             decoder.decode(fh, encode_nominal=True)
                     except arff.ArffException:
                         raise ValueError("The file you have provided is not "
                                          "a valid arff file.")
 
-                    file_elements['dataset'] = open(path, 'rb')
+                    with open(path, 'rb') as fp:
+                        file_elements['dataset'] = fp.read()
             else:
                 if self.url is None:
                     raise ValueError("No url/path to the data file was given")
 
@@ -1264,29 +1264,36 @@ def _prediction_to_probabilities(y: np.ndarray, classes: List[Any]) -> np.ndarra
             try:
                 proba_y = model_copy.predict_proba(X_test)
             except AttributeError:
-                proba_y = _prediction_to_probabilities(pred_y, list(task.class_labels))
-
-            if proba_y.shape[1] != len(task.class_labels):
-                # Remap the probabilities in case there was a class missing at training time
-                # By default, the classification targets are mapped to be zero-based indices to the
-                # actual classes. Therefore, the model_classes contain the correct indices to the
-                # correct probability array. Example:
-                # classes in the dataset: 0, 1, 2, 3, 4, 5
-                # classes in the training set: 0, 1, 2, 4, 5
-                # then we need to add a column full of zeros into the probabilities for class 3
-                # (because the rest of the library expects that the probabilities are ordered the
-                # same way as the classes are ordered).
-                proba_y_new = np.zeros((proba_y.shape[0], len(task.class_labels)))
-                for idx, model_class in enumerate(model_classes):
-                    proba_y_new[:, model_class] = proba_y[:, idx]
-                proba_y = proba_y_new
-
-            if proba_y.shape[1] != len(task.class_labels):
-                message = "Estimator only predicted for {}/{} classes!".format(
-                    proba_y.shape[1], len(task.class_labels),
-                )
-                warnings.warn(message)
-                openml.config.logger.warn(message)
+                if task.class_labels is not None:
+                    proba_y = _prediction_to_probabilities(pred_y, list(task.class_labels))
+                else:
+                    raise ValueError('The task has no class labels')
+
+            if task.class_labels is not None:
+                if proba_y.shape[1] != len(task.class_labels):
+                    # Remap the probabilities in case there was a class missing
+                    # at training time. By default, the classification targets
+                    # are mapped to be zero-based indices to the actual classes.
+                    # Therefore, the model_classes contain the correct indices to
+                    # the correct probability array. Example:
+                    # classes in the dataset: 0, 1, 2, 3, 4, 5
+                    # classes in the training set: 0, 1, 2, 4, 5
+                    # then we need to add a column full of zeros into the probabilities
+                    # for class 3 because the rest of the library expects that the
+                    # probabilities are ordered the same way as the classes are ordered).
+                    proba_y_new = np.zeros((proba_y.shape[0], len(task.class_labels)))
+                    for idx, model_class in enumerate(model_classes):
+                        proba_y_new[:, model_class] = proba_y[:, idx]
+                    proba_y = proba_y_new
+
+                if proba_y.shape[1] != len(task.class_labels):
+                    message = "Estimator only predicted for {}/{} classes!".format(
+                        proba_y.shape[1], len(task.class_labels),
+                    )
+                    warnings.warn(message)
+                    openml.config.logger.warn(message)
+            else:
+                raise ValueError('The task has no class labels')
 
         elif isinstance(task, OpenMLRegressionTask):
             proba_y = None
 
@@ -158,6 +158,9 @@ def run_flow_on_task(
     if flow_tags is not None and not isinstance(flow_tags, list):
         raise ValueError("flow_tags should be a list")
 
+    if task.task_id is None:
+        raise ValueError("The task should be published at OpenML")
+
     # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
@@ -452,11 +455,14 @@ def _calculate_local_measure(sklearn_fn, openml_name):
             for i, tst_idx in enumerate(test_indices):
 
                 arff_line = [rep_no, fold_no, sample_no, tst_idx]  # type: List[Any]
-                for j, class_label in enumerate(task.class_labels):
-                    arff_line.append(proba_y[i][j])
+                if task.class_labels is not None:
+                    for j, class_label in enumerate(task.class_labels):
+                        arff_line.append(proba_y[i][j])
 
-                arff_line.append(task.class_labels[pred_y[i]])
-                arff_line.append(task.class_labels[test_y[i]])
+                    arff_line.append(task.class_labels[pred_y[i]])
+                    arff_line.append(task.class_labels[test_y[i]])
+                else:
+                    raise ValueError('The task has no class labels')
 
                 arff_datacontent.append(arff_line)
 
 
@@ -216,30 +216,45 @@ def _generate_arff_dict(self) -> 'OrderedDict[str, Any]':
             'openml_task_{}_predictions'.format(task.task_id)
 
         if isinstance(task, OpenMLLearningCurveTask):
-            class_labels = task.class_labels  # type: ignore
-            arff_dict['attributes'] = [('repeat', 'NUMERIC'),
-                                       ('fold', 'NUMERIC'),
-                                       ('sample', 'NUMERIC'),
-                                       ('row_id', 'NUMERIC')] + \
-                                      [('confidence.' + class_labels[i],
-                                        'NUMERIC') for i in
-                                       range(len(class_labels))] + \
-                                      [('prediction', class_labels),
-                                       ('correct', class_labels)]
+            class_labels = task.class_labels
+            instance_specifications = [
+                ('repeat', 'NUMERIC'),
+                ('fold', 'NUMERIC'),
+                ('sample', 'NUMERIC'),
+                ('row_id', 'NUMERIC')
+            ]
+
+            arff_dict['attributes'] = instance_specifications
+            if class_labels is not None:
+                arff_dict['attributes'] = arff_dict['attributes'] + \
+                    [('confidence.' + class_labels[i],
+                      'NUMERIC')
+                     for i in range(len(class_labels))] + \
+                    [('prediction', class_labels),
+                     ('correct', class_labels)]
+            else:
+                raise ValueError('The task has no class labels')
+
         elif isinstance(task, OpenMLClassificationTask):
             class_labels = task.class_labels
             instance_specifications = [('repeat', 'NUMERIC'),
                                        ('fold', 'NUMERIC'),
                                        ('sample', 'NUMERIC'),  # Legacy
                                        ('row_id', 'NUMERIC')]
-            prediction_confidences = [('confidence.' + class_labels[i],
-                                       'NUMERIC')
-                                      for i in range(len(class_labels))]
-            prediction_and_true = [('prediction', class_labels),
-                                   ('correct', class_labels)]
-            arff_dict['attributes'] = (instance_specifications
-                                       + prediction_confidences
-                                       + prediction_and_true)
+
+            arff_dict['attributes'] = instance_specifications
+            if class_labels is not None:
+                prediction_confidences = [('confidence.' + class_labels[i],
+                                           'NUMERIC')
+                                          for i in range(len(class_labels))]
+                prediction_and_true = [('prediction', class_labels),
+                                       ('correct', class_labels)]
+                arff_dict['attributes'] = arff_dict['attributes'] + \
+                    prediction_confidences + \
+                    prediction_and_true
+            else:
+                raise ValueError('The task has no class labels')
+
         elif isinstance(task, OpenMLRegressionTask):
             arff_dict['attributes'] = [('repeat', 'NUMERIC'),
                                        ('fold', 'NUMERIC'),
 
@@ -8,7 +8,12 @@
     TaskTypeEnum,
 )
 from .split import OpenMLSplit
-from .functions import (get_task, get_tasks, list_tasks)
+from .functions import (
+    create_task,
+    get_task,
+    get_tasks,
+    list_tasks,
+)
 
 __all__ = [
     'OpenMLTask',
@@ -17,6 +22,7 @@
     'OpenMLRegressionTask',
     'OpenMLClassificationTask',
     'OpenMLLearningCurveTask',
+    'create_task',
     'get_task',
     'get_tasks',
     'list_tasks',
 
@@ -2,6 +2,7 @@
 import io
 import re
 import os
+from typing import Union, Optional
 import xmltodict
 
 from ..exceptions import OpenMLCacheException
@@ -441,3 +442,69 @@ def _create_task_from_xml(xml):
         raise NotImplementedError('Task type %s not supported.' %
                                   common_kwargs['task_type'])
     return cls(**common_kwargs)
+
+
+def create_task(
+        task_type_id: int,
+        dataset_id: int,
+        estimation_procedure_id: int,
+        target_name: Optional[str] = None,
+        evaluation_measure: Optional[str] = None,
+        **kwargs
+) -> Union[
+    OpenMLClassificationTask, OpenMLRegressionTask,
+    OpenMLLearningCurveTask, OpenMLClusteringTask
+]:
+    """Create a task based on different given attributes.
+
+    Builds a task object with the function arguments as
+    attributes. The type of the task object built is
+    determined from the task type id.
+    More information on how the arguments (task attributes),
+    relate to the different possible tasks can be found in
+    the individual task objects at the openml.tasks.task
+    module.
+
+    Parameters
+    ----------
+    task_type_id : int
+        Id of the task type.
+    dataset_id : int
+        The id of the dataset for the task.
+    target_name : str, optional
+        The name of the feature used as a target.
+        At the moment, only optional for the clustering tasks.
+    estimation_procedure_id : int
+        The id of the estimation procedure.
+    evaluation_measure : str, optional
+        The name of the evaluation measure.
+    kwargs : dict, optional
+        Other task attributes that are not mandatory
+        for task upload.
+
+    Returns
+    -------
+    OpenMLClassificationTask, OpenMLRegressionTask,
+    OpenMLLearningCurveTask, OpenMLClusteringTask
+    """
+    task_cls = {
+        TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+        TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+        TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
+        TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
+    }.get(task_type_id)
+
+    if task_cls is None:
+        raise NotImplementedError(
+            'Task type {0:d} not supported.'.format(task_type_id)
+        )
+    else:
+        return task_cls(
+            task_type_id=task_type_id,
+            task_type=None,
+            data_set_id=dataset_id,
+            target_name=target_name,
+            estimation_procedure_id=estimation_procedure_id,
+            evaluation_measure=evaluation_measure,
+            **kwargs
+        )