remove file util.py

mfeurer · mfeurer · commit 5e65a4706354 · 2017-05-11T15:19:45.000+02:00
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
@@ -23,7 +23,6 @@
         import pickle
 
 
-from ..util import is_string
 from .._api_calls import _perform_api_call
 
 logger = logging.getLogger(__name__)
@@ -219,7 +218,7 @@ def get_data(self, target=None, target_dtype=int, include_row_id=False,
             if not self.row_id_attribute:
                 pass
             else:
-                if is_string(self.row_id_attribute):
+                if isinstance(self.row_id_attribute, six.string_types):
                     to_exclude.append(self.row_id_attribute)
                 else:
                     to_exclude.extend(self.row_id_attribute)
@@ -243,7 +242,7 @@ def get_data(self, target=None, target_dtype=int, include_row_id=False,
         if target is None:
             rval.append(data)
         else:
-            if is_string(target):
+            if isinstance(target, six.string_types):
                 target = [target]
             targets = np.array([True if column in target else False
                                 for column in attribute_names])
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -1,22 +1,21 @@
 from collections import defaultdict
 import io
+import json
 import os
-import xmltodict
-import numpy as np
+import sys
+import time
 import warnings
+
+import numpy as np
 import sklearn
-import time
 import six
-import json
+import xmltodict
 
 from ..exceptions import PyOpenMLError
 from .. import config
-
 from ..flows import sklearn_to_flow, get_flow, flow_exists, _check_n_jobs
 from ..setups import setup_exists, initialize_model
-
 from ..exceptions import OpenMLCacheException, OpenMLServerException
-from ..util import URLError, version_complies
 from .._api_calls import _perform_api_call, _file_id_to_url
 from .run import OpenMLRun, _get_version_information
 from .trace import OpenMLRunTrace, OpenMLTraceIteration
@@ -26,7 +25,6 @@
 # circular imports
 
 
-
 def run_task(task, model, avoid_duplicate_runs=True, flow_tags=None, seed=None):
     """Performs a CV run on the dataset of the given task, using the split.
 
@@ -296,7 +294,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
     user_defined_measures = defaultdict(lambda: defaultdict(dict))
 
     rep_no = 0
-    can_measure_runtime = version_complies(3, 3) and _check_n_jobs(model)
+    can_measure_runtime = sys.version_info[:2] >= (3, 3) and _check_n_jobs(model)
     # TODO use different iterator to only provide a single iterator (less
     # methods, less maintenance, less confusion)
     for rep in task.iterate_repeats():
@@ -447,14 +445,9 @@ def get_run(run_id):
 
     try:
         return _get_cached_run(run_id)
-    except (OpenMLCacheException):
-        try:
-            run_xml = _perform_api_call("run/%d" % run_id)
-        except (URLError, UnicodeEncodeError) as e:
-            # TODO logger.debug
-            print(e)
-            raise e
 
+    except (OpenMLCacheException):
+        run_xml = _perform_api_call("run/%d" % run_id)
         with io.open(run_file, "w", encoding='utf8') as fh:
             fh.write(run_xml)
 
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -4,7 +4,6 @@
 from collections import OrderedDict
 import xmltodict
 
-from ..util import URLError
 from ..exceptions import OpenMLCacheException
 from .. import datasets
 from .task import OpenMLTask, _create_task_cache_dir
@@ -103,12 +102,11 @@ def list_tasks(task_type_id=None, offset=None, size=None, tag=None):
 
     Returns
     -------
-    list
-        A list of all tasks having the given task_type_id and the give tag.
-        Every task is represented by a dictionary containing the following
-        information: task id, dataset id, task_type and status. If qualities
-        are calculated for the associated dataset, some of these are also
-        returned.
+    dict
+        All tasks having the given task_type_id and the give tag. Every task is
+        represented by a dictionary containing the following information:
+        task id, dataset id, task_type and status. If qualities are calculated
+        for the associated dataset, some of these are also returned.
     """
     api_call = "task/list"
     if task_type_id is not None:
@@ -146,7 +144,7 @@ def _list_tasks(api_call):
                          % str(tasks_dict))
 
     try:
-        tasks = dict();
+        tasks = dict()
         procs = _get_estimation_procedure_list()
         proc_dict = dict((x['id'], x) for x in procs)
         for task_ in tasks_dict['oml:tasks']['oml:task']:
@@ -199,13 +197,9 @@ def get_task(task_id):
     try:
         with io.open(xml_file, encoding='utf8') as fh:
             task = _create_task_from_xml(fh.read())
-    except (OSError, IOError):
 
-        try:
-            task_xml = _perform_api_call("task/%d" % task_id)
-        except (URLError, UnicodeEncodeError) as e:
-            print(e)
-            raise e
+    except (OSError, IOError):
+        task_xml = _perform_api_call("task/%d" % task_id)
 
         with io.open(xml_file, "w", encoding='utf8') as fh:
             fh.write(task_xml)
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -3,7 +3,6 @@
 
 from .. import config
 from .. import datasets
-from ..util import URLError
 from .split import OpenMLSplit
 from .._api_calls import _read_url
 
@@ -70,11 +69,7 @@ def _download_split(self, cache_file):
                 pass
         except (OSError, IOError):
             split_url = self.estimation_procedure["data_splits_url"]
-            try:
-                split_arff = _read_url(split_url)
-            except (URLError, UnicodeEncodeError) as e:
-                print(e, split_url)
-                raise e
+            split_arff = _read_url(split_url)
 
             with io.open(cache_file, "w", encoding='utf8') as fh:
                 fh.write(split_arff)
diff --git a/openml/util.py b/openml/util.py
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
@@ -3,9 +3,9 @@
 import os
 
 import numpy as np
+import six
 
 from openml import OpenMLDataset
-from openml.util import is_string
 
 
 class OpenMLDatasetTest(unittest.TestCase):
@@ -64,7 +64,8 @@ def test_get_data(self):
         rval, attribute_names = self.dataset.get_data(
             return_attribute_names=True)
         self.assertEqual(len(attribute_names), 39)
-        self.assertTrue(all([is_string(att) for att in attribute_names]))
+        self.assertTrue(all([isinstance(att, six.string_types)
+                             for att in attribute_names]))
 
     def test_get_sparse_dataset(self):
         rval = self.sparse_dataset.get_data()
@@ -80,7 +81,8 @@ def test_get_sparse_dataset(self):
             return_attribute_names=True)
         self.assertIsInstance(rval, np.ndarray)
         self.assertEqual(len(attribute_names), 20001)
-        self.assertTrue(all([is_string(att) for att in attribute_names]))
+        self.assertTrue(all([isinstance(att, six.string_types)
+                             for att in attribute_names]))
 
     def test_get_data_with_target(self):
         X, y = self.dataset.get_data(target="class")
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -8,12 +8,12 @@
 else:
     import mock
 
+import six
 import scipy.sparse
 
 import openml
 from openml import OpenMLDataset
 from openml.exceptions import OpenMLCacheException, PyOpenMLError
-from openml.util import is_string
 from openml.testing import TestBase
 
 from openml.datasets.functions import (_get_cached_dataset,
@@ -98,7 +98,7 @@ def _check_dataset(self, dataset):
             self.assertIn('did', dataset)
             self.assertIsInstance(dataset['did'], int)
             self.assertIn('status', dataset)
-            self.assertTrue(is_string(dataset['status']))
+            self.assertIsInstance(dataset['status'], six.string_types)
             self.assertIn(dataset['status'], ['in_preparation', 'active',
                                               'deactivated'])
 
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
@@ -2,21 +2,22 @@
 import copy
 import unittest
 
+import six
+
 import openml
-from openml.util import is_string
 
 
 class TestFlowFunctions(unittest.TestCase):
     def _check_flow(self, flow):
         self.assertEqual(type(flow), dict)
         self.assertEqual(len(flow), 6)
         self.assertIsInstance(flow['id'], int)
-        self.assertTrue(is_string(flow['name']))
-        self.assertTrue(is_string(flow['full_name']))
-        self.assertTrue(is_string(flow['version']))
+        self.assertIsInstance(flow['name'], six.string_types)
+        self.assertIsInstance(flow['full_name'], six.string_types)
+        self.assertIsInstance(flow['version'], six.string_types)
         # There are some runs on openml.org that can have an empty external
         # version
-        self.assertTrue(is_string(flow['external_version']) or
+        self.assertTrue(isinstance(flow['external_version'], six.string_types) or
                         flow['external_version'] is None)
 
     def test_list_flows(self):
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
@@ -1,12 +1,13 @@
 import os
 import sys
 
+import six
+
 if sys.version_info[0] >= 3:
     from unittest import mock
 else:
     import mock
 
-from openml.util import is_string
 from openml.testing import TestBase
 from openml import OpenMLSplit, OpenMLTask
 from openml.exceptions import OpenMLCacheException
@@ -45,7 +46,7 @@ def _check_task(self, task):
         self.assertIn('did', task)
         self.assertIsInstance(task['did'], int)
         self.assertIn('status', task)
-        self.assertTrue(is_string(task['status']))
+        self.assertIsInstance(task['status'], six.string_types)
         self.assertIn(task['status'],
                       ['in_preparation', 'active', 'deactivated'])