openml
diff --git a/‎openml/runs/run.py‎
Lines changed: 1 addition & 1 deletion b/‎openml/runs/run.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openml/tasks/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎openml/tasks/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openml/tasks/task_functions.py‎ ‎openml/tasks/functions.py‎openml/tasks/task_functions.py renamed to openml/tasks/functions.py
Lines changed: 22 additions & 24 deletions b/‎openml/tasks/task_functions.py‎ ‎openml/tasks/functions.py‎openml/tasks/task_functions.py renamed to openml/tasks/functions.py
Lines changed: 22 additions & 24 deletions
diff --git a/‎tests/entities/test_split.py‎
Lines changed: 8 additions & 6 deletions b/‎tests/entities/test_split.py‎
Lines changed: 8 additions & 6 deletions
@@ -10,7 +10,7 @@
 from ..exceptions import OpenMLCacheException
 from ..util import URLError
 from ..tasks import get_task
-from ..tasks.task_functions import _create_task_from_xml
+from ..tasks.functions import _create_task_from_xml
 from .._api_calls import _perform_api_call
 
 
 
@@ -1,7 +1,7 @@
 from .task import OpenMLTask
 from .split import OpenMLSplit
-from .task_functions import (get_task, list_tasks, list_tasks_by_type,
-                             list_tasks_by_tag)
+from .functions import (get_task, list_tasks, list_tasks_by_type,
+                        list_tasks_by_tag)
 
 __all__ = ['OpenMLTask', 'get_task', 'list_tasks', 'list_tasks_by_type',
            'list_tasks_by_tag', 'OpenMLSplit']
@@ -23,21 +23,19 @@ def _get_cached_tasks():
         # description
 
         for filename in directory_content:
-            match = re.match(r"(tid)_([0-9]*)\.xml", filename)
-            if match:
-                tid = match.group(2)
-                tid = int(tid)
+            if not re.match(r"[0-9]*", filename):
+                continue
 
-                tasks[tid] = _get_cached_task(tid)
+            tid = int(filename)
+            tasks[tid] = _get_cached_task(tid)
 
     return tasks
 
 
 def _get_cached_task(tid):
     for cache_dir in [config.get_cache_directory(), config.get_private_directory()]:
         task_cache_dir = os.path.join(cache_dir, "tasks")
-        task_file = os.path.join(task_cache_dir,
-                                 "tid_%d.xml" % int(tid))
+        task_file = os.path.join(task_cache_dir, str(tid), "task.xml")
 
         try:
             with open(task_file) as fh:
@@ -50,7 +48,7 @@ def _get_cached_task(tid):
                                "cached" % tid)
 
 
-def get_estimation_procedure_list():
+def _get_estimation_procedure_list():
     """Return a list of all estimation procedures which are on OpenML.
 
     Returns
@@ -65,9 +63,18 @@ def get_estimation_procedure_list():
         "estimationprocedure/list")
     procs_dict = xmltodict.parse(xml_string)
     # Minimalistic check if the XML is useful
-    assert procs_dict['oml:estimationprocedures']['@xmlns:oml'] == \
-        'http://openml.org/openml'
-    assert type(procs_dict['oml:estimationprocedures']['oml:estimationprocedure']) == list
+    if 'oml:estimationprocedures' not in procs_dict:
+        raise ValueError('Error in return XML, does not contain tag '
+                         'oml:estimationprocedures.')
+    elif '@xmlns:oml' not in procs_dict['oml:estimationprocedures']:
+        raise ValueError('Error in return XML, does not contain tag '
+                         '@xmlns:oml as a child of oml:estimationprocedures.')
+    elif procs_dict['oml:estimationprocedures']['@xmlns:oml'] != \
+            'http://openml.org/openml':
+        raise ValueError('Error in return XML, value of '
+                         'oml:estimationprocedures/@xmlns:oml is not '
+                         'http://openml.org/openml, but %s' %
+                         str(procs_dict['oml:estimationprocedures']['@xmlns:oml']))
 
     procs = []
     for proc_ in procs_dict['oml:estimationprocedures']['oml:estimationprocedure']:
@@ -156,7 +163,7 @@ def _list_tasks(api_call):
                          % str(tasks_dict))
     try:
         tasks = []
-        procs = get_estimation_procedure_list()
+        procs = _get_estimation_procedure_list()
         proc_dict = dict((x['id'], x) for x in procs)
         for task_ in tasks_dict['oml:tasks']['oml:task']:
             task = {'tid': int(task_['oml:task_id']),
@@ -217,21 +224,12 @@ def get_task(task_id):
             print(e)
             raise e
 
-        # Cache the xml task file
-        if os.path.exists(xml_file):
-            with open(xml_file) as fh:
-                local_xml = fh.read()
-
-            if task_xml != local_xml:
-                raise ValueError("Task description of task %d cached at %s "
-                                 "has changed." % (task_id, xml_file))
-
-        else:
-            with open(xml_file, "w") as fh:
-                fh.write(task_xml)
+        with open(xml_file, "w") as fh:
+            fh.write(task_xml)
 
         task = _create_task_from_xml(task_xml)
 
+    # TODO extract this to a function
     task.download_split()
     dataset = datasets.get_dataset(task.dataset_id)
 
 
@@ -12,8 +12,8 @@ def setUp(self):
         __file__ = inspect.getfile(OpenMLSplitTest)
         self.directory = os.path.dirname(__file__)
         # This is for dataset
-        self.arff_filename = os.path.join(self.directory, "..",
-                                          "files", "tasks", "datasplits.arff")
+        self.arff_filename = os.path.join(
+            self.directory, "..", "files", "tasks", "1882", "datasplits.arff")
         self.pd_filename = self.arff_filename.replace(".arff", ".pkl")
 
     def tearDown(self):
@@ -54,14 +54,16 @@ def test_from_arff_file(self):
         self.assertIsInstance(split.split[0][0].test, np.ndarray)
         for i in range(10):
             for j in range(10):
-                self.assertEqual((81,), split.split[i][j].train.shape)
-                self.assertEqual((9,), split.split[i][j].test.shape)
+                self.assertGreaterEqual(split.split[i][j].train.shape[0], 808)
+                self.assertGreaterEqual(split.split[i][j].test.shape[0], 89)
+                self.assertEqual(split.split[i][j].train.shape[0] +
+                                 split.split[i][j].test.shape[0], 898)
 
     def test_get_split(self):
         split = OpenMLSplit._from_arff_file(self.arff_filename)
         train_split, test_split = split.get(fold=5, repeat=2)
-        self.assertEqual(train_split.shape, (81,))
-        self.assertEqual(test_split.shape, (9,))
+        self.assertEqual(train_split.shape[0], 808)
+        self.assertEqual(test_split.shape[0], 90)
         self.assertRaisesRegexp(ValueError, "Repeat 10 not known",
                                 split.get, 10, 2)
         self.assertRaisesRegexp(ValueError, "Fold 10 not known",