openml
diff --git a/‎.travis.yml‎
Lines changed: 5 additions & 3 deletions b/‎.travis.yml‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎openml/__init__.py‎
Lines changed: 40 additions & 2 deletions b/‎openml/__init__.py‎
Lines changed: 40 additions & 2 deletions
diff --git a/‎openml/_api_calls.py‎
Lines changed: 17 additions & 2 deletions b/‎openml/_api_calls.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎openml/config.py‎
Lines changed: 6 additions & 8 deletions b/‎openml/config.py‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 11 additions & 14 deletions b/‎openml/datasets/dataset.py‎
Lines changed: 11 additions & 14 deletions
diff --git a/‎openml/datasets/functions.py‎
Lines changed: 22 additions & 5 deletions b/‎openml/datasets/functions.py‎
Lines changed: 22 additions & 5 deletions
diff --git a/‎openml/exceptions.py‎
Lines changed: 2 additions & 2 deletions b/‎openml/exceptions.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openml/flows/__init__.py‎
Lines changed: 4 additions & 3 deletions b/‎openml/flows/__init__.py‎
Lines changed: 4 additions & 3 deletions
@@ -15,9 +15,11 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="2.7" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.21" SKLEARN_VERSION="0.18"
-  - DISTRIB="conda" PYTHON_VERSION="3.4" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18"
-  - DISTRIB="conda" PYTHON_VERSION="3.5" COVERAGE="true" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18"
+  - DISTRIB="conda" PYTHON_VERSION="2.7" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.21" SKLEARN_VERSION="0.18.1"
+  - DISTRIB="conda" PYTHON_VERSION="3.4" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18.1"
+  - DISTRIB="conda" PYTHON_VERSION="3.5" NUMPY_VERSION="1.11" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.23.4" SKLEARN_VERSION="0.18.1"
+  - DISTRIB="conda" PYTHON_VERSION="3.6" COVERAGE="true" NUMPY_VERSION="1.12.1" SCIPY_VERSION="0.19.0" CYTHON_VERSION="0.25.2" SKLEARN_VERSION="0.18.1"
+
 install: source ci_scripts/install.sh
 script: bash ci_scripts/test.sh
 after_success: source ci_scripts/success.sh
@@ -18,15 +18,53 @@
 
 from .datasets import OpenMLDataset, OpenMLDataFeature
 from . import datasets
+from . import tasks
 from . import runs
 from . import flows
+from . import setups
 from .runs import OpenMLRun
 from .tasks import OpenMLTask, OpenMLSplit
 from .flows import OpenMLFlow
 
+__version__ = "0.4.0dev"
+
+
+def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
+                   run_ids=None):
+    """
+    Populate a cache for offline and parallel usage of the OpenML connector.
+
+    Parameters
+    ----------
+    task_ids : iterable
+
+    dataset_ids : iterable
+
+    flow_ids : iterable
+
+    run_ids : iterable
+
+    Returns
+    -------
+    None
+    """
+    if task_ids is not None:
+        for task_id in task_ids:
+            tasks.functions.get_task(task_id)
+
+    if dataset_ids is not None:
+        for dataset_id in dataset_ids:
+            datasets.functions.get_dataset(dataset_id)
+
+    if flow_ids is not None:
+        for flow_id in flow_ids:
+            flows.functions.get_flow(flow_id)
+
+    if run_ids is not None:
+        for run_id in run_ids:
+            runs.functions.get_run(run_id)
 
-__version__ = "0.2.1"
 
 __all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
            'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
-           'config', 'runs', 'flows']
+           'config', 'runs', 'flows', 'tasks', 'setups']
@@ -1,8 +1,9 @@
 import io
 import os
 import requests
-import arff
 import warnings
+
+import arff
 import xmltodict
 
 from . import config
@@ -51,6 +52,18 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
     return _read_url(url, data)
 
 
+def _file_id_to_url(file_id, filename=None):
+    '''
+     Presents the URL how to download a given file id
+     filename is optional
+    '''
+    openml_url = config.server.split('/api/')
+    url = openml_url[0] + '/data/download/%s' %file_id
+    if filename is not None:
+        url += '/' + filename
+    return url
+
+
 def _read_url_files(url, data=None, file_dictionary=None, file_elements=None):
     """do a post request to url with data, file content of
     file_dictionary and sending file_elements as files"""
@@ -110,7 +123,9 @@ def _parse_server_exception(response):
     try:
         server_exception = xmltodict.parse(response.text)
     except:
-        raise OpenMLServerError(('Status code: %d\n' % response.status_code) + response.text)
+        raise OpenMLServerError(('Unexpected server error. Please '
+                                 'contact the developers!\nStatus code: '
+                                 '%d\n' % response.status_code) + response.text)
 
     code = int(server_exception['oml:error']['oml:code'])
     message = server_exception['oml:error']['oml:message']
 
@@ -1,9 +1,12 @@
 """
 Stores module level information like the API key, cache directory and the server.
 """
-import os
-import sys
 import logging
+import os
+
+from six import StringIO
+from six.moves import configparser
+
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(
@@ -15,12 +18,7 @@
 cachedir = ""
 
 
-if sys.version_info[0] < 3:
-    import ConfigParser as configparser
-    from StringIO import StringIO
-else:
-    import configparser
-    from io import StringIO
+
 
 
 def _setup():
 
@@ -9,21 +9,11 @@
 
 import numpy as np
 import scipy.sparse
+from six.moves import cPickle as pickle
 import xmltodict
 
 from .data_feature import OpenMLDataFeature
 from ..exceptions import PyOpenMLError
-
-if sys.version_info[0] >= 3:
-    import pickle
-else:
-    try:
-        import cPickle as pickle
-    except:
-        import pickle
-
-
-from ..util import is_string
 from .._api_calls import _perform_api_call
 
 logger = logging.getLogger(__name__)
@@ -49,7 +39,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
                  row_id_attribute=None, ignore_attribute=None,
                  version_label=None, citation=None, tag=None, visibility=None,
                  original_data_url=None, paper_url=None, update_comment=None,
-                 md5_checksum=None, data_file=None, features=None):
+                 md5_checksum=None, data_file=None, features=None, qualities=None):
         # Attributes received by querying the RESTful API
         self.dataset_id = int(dataset_id) if dataset_id is not None else None
         self.name = name
@@ -84,6 +74,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
         self.md5_cheksum = md5_checksum
         self.data_file = data_file
         self.features = None
+        self.qualities = None
 
         if features is not None:
             self.features = {}
@@ -97,6 +88,12 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
                     raise ValueError('Data features not provided in right order')
                 self.features[feature.index] = feature
 
+        if qualities is not None:
+            self.qualities = {}
+            for idx, xmlquality in enumerate(qualities['oml:quality']):
+                name = xmlquality['oml:name']
+                value = xmlquality['oml:value']
+                self.qualities[name] = value
 
         if data_file is not None:
             if self._data_features_supported():
@@ -219,7 +216,7 @@ def get_data(self, target=None, target_dtype=int, include_row_id=False,
             if not self.row_id_attribute:
                 pass
             else:
-                if is_string(self.row_id_attribute):
+                if isinstance(self.row_id_attribute, six.string_types):
                     to_exclude.append(self.row_id_attribute)
                 else:
                     to_exclude.extend(self.row_id_attribute)
@@ -243,7 +240,7 @@ def get_data(self, target=None, target_dtype=int, include_row_id=False,
         if target is None:
             rval.append(data)
         else:
-            if is_string(target):
+            if isinstance(target, six.string_types):
                 target = [target]
             targets = np.array([True if column in target else False
                                 for column in attribute_names])
 
@@ -1,9 +1,11 @@
+from collections import OrderedDict
 import io
 import os
 import re
 import shutil
-from collections import OrderedDict
+
 import xmltodict
+
 from .dataset import OpenMLDataset
 from ..exceptions import OpenMLCacheException
 from .. import config
@@ -73,7 +75,8 @@ def _get_cached_dataset(dataset_id):
     description = _get_cached_dataset_description(dataset_id)
     arff_file = _get_cached_dataset_arff(dataset_id)
     features = _get_cached_dataset_features(dataset_id)
-    dataset = _create_dataset_from_description(description, features, arff_file)
+    qualities = _get_cached_dataset_qualities(dataset_id)
+    dataset = _create_dataset_from_description(description, features, qualities, arff_file)
 
     return dataset
 
@@ -105,6 +108,19 @@ def _get_cached_dataset_features(dataset_id):
                                    "cached" % dataset_id)
 
 
+def _get_cached_dataset_qualities(dataset_id):
+    cache_dir = config.get_cache_directory()
+    did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
+    qualities_file = os.path.join(did_cache_dir, "qualities.xml")
+    try:
+        with io.open(qualities_file, encoding='utf8') as fh:
+            qualities_xml = fh.read()
+            return xmltodict.parse(qualities_xml)["oml:data_qualities"]
+    except (IOError, OSError):
+        raise OpenMLCacheException("Dataset qualities for dataset id %d not "
+                                   "cached" % dataset_id)
+
+
 def _get_cached_dataset_arff(dataset_id):
     cache_dir = config.get_cache_directory()
     did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
@@ -270,7 +286,7 @@ def get_dataset(dataset_id):
         _remove_dataset_cache_dir(did_cache_dir)
         raise e
 
-    dataset = _create_dataset_from_description(description, features, arff_file)
+    dataset = _create_dataset_from_description(description, features, qualities, arff_file)
     return dataset
 
 
@@ -468,7 +484,7 @@ def _remove_dataset_cache_dir(did_cache_dir):
                              'Please do this manually!' % did_cache_dir)
 
 
-def _create_dataset_from_description(description, features, arff_file):
+def _create_dataset_from_description(description, features, qualities, arff_file):
     """Create a dataset object from a description dict.
 
     Parameters
@@ -508,5 +524,6 @@ def _create_dataset_from_description(description, features, arff_file):
         description.get("oml:update_comment"),
         description.get("oml:md5_checksum"),
         data_file=arff_file,
-        features=features)
+        features=features,
+        qualities=qualities)
     return dataset
@@ -1,13 +1,14 @@
 class PyOpenMLError(Exception):
     def __init__(self, message):
+        self.message = message
         super(PyOpenMLError, self).__init__(message)
 
+
 class OpenMLServerError(PyOpenMLError):
     """class for when something is really wrong on the server
        (result did not parse to dict), contains unparsed error."""
 
     def __init__(self, message):
-        message = "OpenML Server error: " + message
         super(OpenMLServerError, self).__init__(message)
 
 #
@@ -18,7 +19,6 @@ class OpenMLServerException(OpenMLServerError):
     def __init__(self, code, message, additional=None):
         self.code = code
         self.additional = additional
-        message = "OpenML Server exception: " + message
         super(OpenMLServerException, self).__init__(message)
 
 
 
@@ -1,6 +1,7 @@
-from .flow import OpenMLFlow
-from .sklearn_converter import sklearn_to_flow, flow_to_sklearn
-from .functions import get_flow, list_flows, flow_exists
+from .flow import OpenMLFlow, _copy_server_fields
+
+from .sklearn_converter import sklearn_to_flow, flow_to_sklearn, _check_n_jobs
+from .functions import get_flow, list_flows, flow_exists, assert_flows_equal
 
 __all__ = ['OpenMLFlow', 'create_flow_from_model', 'get_flow', 'list_flows',
            'sklearn_to_flow', 'flow_to_sklearn', 'flow_exists']