Merge branch 'feature/AddingFlowFunction' into develop

zardaloop · zardaloop · commit 9a00067dbee6 · 2015-09-04T15:33:20.000+02:00
* feature/AddingFlowFunction:
  updated the source progress
  Implemented posting run.
  Added upload flow function
diff --git a/openml/apiconnector.py b/openml/apiconnector.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict
+import hashlib
 import logging
 import os
 import re
@@ -816,6 +817,7 @@ def _create_task_cache_dir(self, task_id):
             pass
         return task_cache_dir
 
+    def _perform_api_call(self, call, data=None, file_dictionary=None, add_authentication=True):
     ############################################################################
     # Runs
     def get_runs_list(self, task_id=None, flow_id=None, setup_id=None):
@@ -1025,33 +1027,37 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
         if not url.endswith("/"):
             url += "/"
         url += call
-        return self._read_url(url, data=data, file_path=file_path)
+        return self._read_url(url, data=data, file_dictionary=file_dictionary)
 
-    def _read_url(self, url, data=None, file_path=None):
+    def _read_url(self, url, data=None, file_dictionary=None):
         if data is None:
             data = {}
         data['api_key'] = self.config.get('FAKE_SECTION', 'apikey')
 
-        if file_path is not None:
-            if os.path.isabs(file_path):
-                try:
-                    decoder = arff.ArffDecoder()
-                except:
-                    raise "The file you provided is not a valid arff file"
-
-                fileElement={'dataset': open(file_path, 'rb')}
-                data['description']= data.get('description')
-                data.pop('dataset', None)
-
-                try:
-                    response = requests.post(url, data=data, files=fileElement)
+        if file_dictionary is not None:
+            file_elements = {}
+            for key, path in file_dictionary.items():
+                if os.path.isabs(path) and os.path.exists(path):
+                    try:
+                        if key is 'dataset':
+                            decoder = arff.ArffDecoder()
+                            with open(path) as fh:
+                                decoder.decode(fh, encode_nominal=True)
+                    except:
+                        raise ValueError("The file you have provided is not a valid arff file")
+
+                    file_elements[key] = open(path, 'rb')
                 except URLError as error:
                     print(error)
 
+                else:
+                    raise ValueError("File doesn't exist")
+
+                response = requests.post(url, data=data, files=file_elements)
                 return response.status_code, response
-            else:
-                raise "File doesn't exists"
 
+            except URLError as error:
+                print(error)
         else:
             data = urlencode(data)
             data = data.encode('utf-8')
@@ -1096,38 +1102,41 @@ def _read_url(self, url, data=None, file_path=None):
     def upload_dataset(self, description, file_path=None):
         try:
             data = {'description': description}
-            return_code, dataset_xml = self._perform_api_call(
-                "/data/", data=data, file_path=file_path)
+            if file_path is not None:
+                return_code, dataset_xml = self._perform_api_call("/data/",data=data, file_dictionary={'dataset': file_path})
 
         except URLError as e:
             # TODO logger.debug
             print(e)
             raise e
         return return_code, dataset_xml
 
-    def upload_flow(self, description, binary, source):
+    def upload_flow(self, description, file_path=None):
         try:
-            data = {'description': description, 'binary': binary, 'source': source}
-            return_code, dataset_xml = self._perform_api_call(
-                "openml.implementation.upload", data=data)
+            data = {'description': description}
+            return_code, dataset_xml = self._perform_api_call("/flow/", data=data, file_dictionary={'source': file_path})
 
         except URLError as e:
             # TODO logger.debug
             print(e)
             raise e
         return return_code, dataset_xml
 
-    def upload_run(self, description, files):
-        try:
-            data ={'description': description}
-            for key, value in files:
-                data[key] = value
+    def upload_run(self, files):
+        file_dictionary = {}
+        if 'predictions' in files:
+            try:
+                for key, value in files.items():
+                    file_dictionary[key] = value
 
-            return_code, dataset_xml = self._perform_api_call("openml.run.upload", data=data)
+                return_code, dataset_xml = self._perform_api_call("/run/", file_dictionary=file_dictionary)
+
+            except URLError as e:
+                # TODO logger.debug
+                print(e)
+                raise e
+            return return_code, dataset_xml
+        else:
+            raise ValueError("prediction files doesn't exist")
 
-        except URLError as e:
-            # TODO logger.debug
-            print(e)
-            raise e
-        return return_code, dataset_xml
 
diff --git a/source/progress.rst b/source/progress.rst
@@ -16,7 +16,7 @@ API call                                        implemented tested properly test
 /data/list/tag/{tag}
 /data/{data_id}                                 yes         yes
 /data/delete/
-/data/upload/
+/data/upload/                                   yes         yes
 /data/features/{data_id}                        yes         yes
 /data/features/upload/
 /data/qualities/{data_id}                       yes         yes
@@ -36,12 +36,12 @@ API call                                        implemented tested properly test
 /flow/tag
 /flow/untag
 /flow/{flow_id}
-/flow/
+/flow/                                          yes         yes
 /flow/exists/{name,ext_version}
 /flow/owned
 /run/list                                       yes         yes
 /run/{run_id}                                   yes         yes
-/run
+/run                                            yes         yes
 /run/tag
 /run/untag
 /run/evaluate
diff --git a/tests/test_apiconnector.py b/tests/test_apiconnector.py
@@ -5,10 +5,17 @@
 import shutil
 import sys
 
+
 if sys.version_info[0] >= 3:
     from unittest import mock
+    from urllib.request import urlopen
+    from urllib.parse import urlencode
+    from urllib.error import URLError
 else:
     import mock
+    from urllib import urlencode, urlopen
+    from urllib2 import URLError, urlopen
+
 
 from openml.util import is_string
 
@@ -39,13 +46,11 @@ def setUp(self):
         os.chdir(self.workdir)
 
         self.cached = True
-
+        self.connector = APIConnector(cache_directory=self.workdir)
         try:
             apikey = os.environ['OPENMLAPIKEY']
         except:
             apikey = None
-        self.connector = APIConnector(cache_directory=self.workdir,
-                                      apikey=apikey)
 
     def tearDown(self):
         os.chdir(self.cwd)
@@ -269,7 +274,7 @@ def check_flow(flow):
     def test_upload_dataset(self):
 
         dataset = self.connector.download_dataset(3)
-        filePath = os.path.join(self.connector.dataset_cache_dir, "3", "dataset.arff")
+        file_path = os.path.join(self.connector.dataset_cache_dir, "3", "dataset.arff")
 
         description = """ <oml:data_set_description xmlns:oml="http://openml.org/openml">
                         <oml:name>anneal</oml:name>
@@ -281,7 +286,7 @@ def test_upload_dataset(self):
                         <oml:md5_checksum></oml:md5_checksum>
                         </oml:data_set_description>
                          """
-        return_code, dataset_xml = self.connector.upload_dataset(description, filePath)
+        return_code, dataset_xml = self.connector.upload_dataset(description, file_path)
         self.assertEqual(return_code, 200)
 
     def test_upload_dataset_with_url(self):
@@ -294,20 +299,33 @@ def test_upload_dataset_with_url(self):
                         <oml:url>http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff</oml:url>
                         </oml:data_set_description>
                          """
-        return_code, dataset_xml = self.connector.upload_dataset (description)
+        return_code, dataset_xml = self.connector.upload_dataset(description)
         self.assertEqual(return_code, 200)
 
     def test_upload_flow(self):
+        file_path = os.path.join(self.connector.dataset_cache_dir,"uploadflow.txt")
+        file = open(file_path, "w")
+        file.write("Testing upload flow")
+        file.close()
+        description = '''<oml:flow xmlns:oml="http://openml.org/openml"><oml:name>Test</oml:name><oml:description>description</oml:description> </oml:flow>'''
+        return_code, dataset_xml = self.connector.upload_flow(description, file_path)
+        self.assertEqual(return_code, 200)
 
-        description = """ <oml:data_set_description xmlns:oml="http://openml.org/openml">
-                        <oml:name>UploadTestWithURL</oml:name>
-                        <oml:version>1</oml:version>
-                        <oml:description>test</oml:description>
-                        <oml:format>ARFF</oml:format>
-                        <oml:url>http://expdb.cs.kuleuven.be/expdb/data/uci/nominal/iris.arff</oml:url>
-                        </oml:data_set_description>
-                         """
-        return_code, dataset_xml = self.connector.upload_dataset (description)
+    def test_upload_run(self):
+        file = urlopen("http://www.openml.org/data/download/224/weka_generated_predictions1977525485999711307.arff")
+        file_text = file.read()
+        file_path = os.path.join(self.connector.dataset_cache_dir, "weka_generated_predictions1977525485999711307.arff")
+        with open(file_path, "wb") as prediction_file:
+            prediction_file.write(file_text)
+
+        description_text = '''<oml:run xmlns:oml="http://openml.org/openml"><oml:task_id>59</oml:task_id><oml:flow_id>67</oml:flow_id></oml:run>'''
+        description_path = os.path.join(self.connector.dataset_cache_dir, "description.xml")
+        with open(description_path, "w") as description_file:
+            description_file.write(description_text)
+
+        file_dictionary = {'predictions': file_path, 'description': description_path}
+
+        return_code, dataset_xml = self.connector.upload_run(file_dictionary)
         self.assertEqual(return_code, 200)