openml
diff --git a/‎openml/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎openml/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎openml/apiconnector.py‎
Lines changed: 0 additions & 40 deletions b/‎openml/apiconnector.py‎
Lines changed: 0 additions & 40 deletions
diff --git a/‎openml/datasets/functions.py‎
Lines changed: 1 addition & 1 deletion b/‎openml/datasets/functions.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openml/entities/__init__.py‎ b/‎openml/entities/__init__.py‎
diff --git a/‎openml/entities/flow.py‎
Lines changed: 0 additions & 68 deletions b/‎openml/entities/flow.py‎
Lines changed: 0 additions & 68 deletions
diff --git a/‎openml/flows/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎openml/flows/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎openml/flows/flow.py‎
Lines changed: 100 additions & 0 deletions b/‎openml/flows/flow.py‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎openml/runs/run.py‎
Lines changed: 23 additions & 22 deletions b/‎openml/runs/run.py‎
Lines changed: 23 additions & 22 deletions
diff --git a/‎openml/tasks/task.py‎
Lines changed: 1 addition & 1 deletion b/‎openml/tasks/task.py‎
Lines changed: 1 addition & 1 deletion
@@ -20,9 +20,10 @@
 from . import datasets
 from .runs import OpenMLRun
 from .tasks import OpenMLTask, OpenMLSplit
+from .flows import OpenMLFlow
 
 
 __version__ = "0.2.1"
 
 __all__ = ['APIConnector', 'OpenMLDataset', 'OpenMLRun', 'OpenMLSplit',
-           'datasets', 'OpenMLTask']
+           'datasets', 'OpenMLTask', 'OpenMLFlow']
@@ -1,10 +1,8 @@
 import logging
 import os
 import sys
-#import tempfile
 import requests
 import arff
-import xmltodict
 
 if sys.version_info[0] < 3:
     import ConfigParser as configparser
@@ -235,41 +233,3 @@ def _read_url(self, url, data=None):
 
         response = requests.post(url, data=data)
         return response.status_code, response.text
-
-    # -> OpenMLFlow
-    def upload_flow(self, description, flow):
-        """
-        The 'description' is binary data of an XML file according to the XSD Schema (OUTDATED!):
-        https://github.com/openml/website/blob/master/openml_OS/views/pages/rest_api/xsd/openml.implementation.upload.xsd
-
-        (optional) file_path is the absolute path to the file that is the flow (eg. a script)
-        """
-        data = {'description': description, 'source': flow}
-        return_code, dataset_xml = self._perform_api_call(
-            "/flow/", data=data)
-        return return_code, dataset_xml
-
-    # -> OpenMLFlow
-    def check_flow_exists(self, name, version):
-        """Retrieves the flow id of the flow uniquely identified by name+version.
-
-        Returns flow id if such a flow exists,
-        returns -1 if flow does not exists,
-        returns -2 if there was not a well-formed response from the server
-        http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
-        """
-        # Perhaps returns the -1/-2 business with proper raising of exceptions?
-
-        if not (type(name) is str and len(name) > 0):
-            raise ValueError('Parameter \'name\' should be a non-empty string')
-        if not (type(version) is str and len(version) > 0):
-            raise ValueError('Parameter \'version\' should be a non-empty string')
-
-        return_code, xml_response = self._perform_api_call(
-            "/flow/exists/%s/%s" % (name, version))
-        if return_code != 200:
-            # fixme raise appropriate error
-            raise ValueError("api call failed: %s" % xml_response)
-        xml_dict = xmltodict.parse(xml_response)
-        flow_id = xml_dict['oml:flow_exists']['oml:id']
-        return return_code, xml_response, flow_id
@@ -207,7 +207,7 @@ def download_dataset(api_connector, did):
 
     Returns
     -------
-    dataset : :class:`pyMetaLearn.entities.dataset.Dataset`
+    dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset."""
     try:
         did = int(did)
 
@@ -0,0 +1,3 @@
+from .flow import OpenMLFlow, check_flow_exists
+
+__all__ = ['OpenMLFlow', 'check_flow_exists']
@@ -0,0 +1,100 @@
+from collections import OrderedDict
+import xmltodict
+import sklearn
+
+
+class OpenMLFlow(object):
+    def __init__(self, model, id=None, uploader=None,
+                 description='Flow generated by openml_run', creator=None,
+                 contributor=None, tag=None):
+        self.id = id
+        self.upoader = uploader
+        self.description = description
+        self.creator = creator
+        self.tag = tag
+        self.model = model
+        self.source = "FIXME DEFINE PYTHON FLOW"
+        self.name = (model.__module__ + "." +
+                     model.__class__.__name__)
+        self.external_version = 'Tsklearn_' + sklearn.__version__
+
+    def generate_flow_xml(self):
+        model = self.model
+        flow_dict = OrderedDict()
+        flow_dict['oml:flow'] = OrderedDict()
+        flow_dict['oml:flow']['@xmlns:oml'] = 'http://openml.org/openml'
+        flow_dict['oml:flow']['oml:name'] = self.name
+        flow_dict['oml:flow']['oml:external_version'] = self.external_version
+        flow_dict['oml:flow']['oml:description'] = self.description
+
+        clf_params = model.get_params()
+        flow_parameters = []
+        for k, v in clf_params.items():
+            # data_type, default_value, description, recommendedRange
+            # type = v.__class__.__name__    Not using this because it doesn't conform standards
+            # eg. int instead of integer
+            param_dict = {'oml:name': k}
+            flow_parameters.append(param_dict)
+
+        flow_dict['oml:flow']['oml:parameter'] = flow_parameters
+
+        flow_xml = xmltodict.unparse(flow_dict, pretty=True)
+
+        # A flow may not be uploaded with the encoding specification..
+        flow_xml = flow_xml.split('\n', 1)[-1]
+        return flow_xml
+
+    def publish(self, api_connector):
+        """
+        The 'description' is binary data of an XML file according to the XSD Schema (OUTDATED!):
+        https://github.com/openml/website/blob/master/openml_OS/views/pages/rest_api/xsd/openml.implementation.upload.xsd
+
+        (optional) file_path is the absolute path to the file that is the flow (eg. a script)
+        """
+        xml_description = self.generate_flow_xml()
+        data = {'description': xml_description, 'source': self.source}
+        return_code, return_value = api_connector._perform_api_call(
+            "/flow/", data=data)
+        return return_code, return_value
+
+    def ensure_flow_exists(self, connector):
+        """
+        First checks if a flow exists for the given model.
+        If it does, then it will return the corresponding flow id.
+        If it does not, then it will create a flow, and return the flow id
+        of the newly created flow.
+        """
+        import sklearn
+        flow_version = 'Tsklearn_' + sklearn.__version__
+        _, _, flow_id = check_flow_exists(connector, self.name, flow_version)
+
+        if int(flow_id) == -1:
+            return_code, response_xml = self.publish(connector)
+
+            response_dict = xmltodict.parse(response_xml)
+            flow_id = response_dict['oml:upload_flow']['oml:id']
+            return int(flow_id)
+
+        return int(flow_id)
+
+
+def check_flow_exists(api_connector, name, version):
+    """Retrieves the flow id of the flow uniquely identified by name+version.
+
+    Returns flow id if such a flow exists,
+    returns -1 if flow does not exists,
+    http://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
+    """
+    if not (type(name) is str and len(name) > 0):
+        raise ValueError('Parameter \'name\' should be a non-empty string')
+    if not (type(version) is str and len(version) > 0):
+        raise ValueError('Parameter \'version\' should be a non-empty string')
+
+    return_code, xml_response = api_connector._perform_api_call(
+        "/flow/exists/%s/%s" % (name, version))
+    if return_code != 200:
+        # fixme raise appropriate error
+        raise ValueError("api call failed: %s" % xml_response)
+    xml_dict = xmltodict.parse(xml_response)
+    flow_id = xml_dict['oml:flow_exists']['oml:id']
+    return return_code, xml_response, flow_id
@@ -6,7 +6,7 @@
 import os
 
 
-from ..entities.flow import OpenMLFlow
+from ..flows import OpenMLFlow
 from ..exceptions import OpenMLCacheException
 from ..util import URLError
 from ..tasks import download_task
@@ -16,7 +16,7 @@ class OpenMLRun(object):
     def __init__(self, task_id, flow_id, setup_string, dataset_id, files=None,
                  setup_id=None, tags=None, run_id=None, uploader=None,
                  uploader_name=None, evaluations=None, data_content=None,
-                 classifier=None, task_type=None, task_evaluation_measure=None,
+                 model=None, task_type=None, task_evaluation_measure=None,
                  flow_name=None, parameter_settings=None, predictions_url=None):
         self.run_id = run_id
         self.uploader = uploader
@@ -33,7 +33,7 @@ def __init__(self, task_id, flow_id, setup_string, dataset_id, files=None,
         self.predictions_url = predictions_url
         self.evaluations = evaluations
         self.data_content = data_content
-        self.classifier = classifier
+        self.model = model
 
     def generate_arff(self, api_connector):
         """Generates an arff
@@ -81,48 +81,49 @@ def create_description_xml(self):
         run_environment = get_version_information()
         setup_string = ''  # " ".join(sys.argv);
 
-        parameter_settings = self.classifier.get_params()
+        parameter_settings = self.model.get_params()
         # as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
         # so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
         well_formatted_time = time.strftime("%c").replace(
             ' ', '_').replace('/', '-').replace(':', '.')
         tags = run_environment + [well_formatted_time] + ['openml_run'] + \
-            [self.classifier.__module__ + "." + self.classifier.__class__.__name__]
+            [self.model.__module__ + "." + self.model.__class__.__name__]
         description = construct_description_dictionary(
             self.task_id, self.flow_id, setup_string, parameter_settings, tags)
         description_xml = xmltodict.unparse(description, pretty=True)
         return description_xml
 
 
-def openml_run(connector, task, classifier):
+def openml_run(connector, task, model):
     """Performs a CV run on the dataset of the given task, using the split.
 
     Parameters
     ----------
     connector : APIConnector
         Openml APIConnector which is used to download the OpenML Task and Dataset
     taskid : int
-        The integer identifier of the task to run the classifier on
-    classifier : sklearn classifier
-        a classifier which has a function fit(X,Y) and predict(X),
-        all supervised estimators of scikit learn follow this definition of a classifier [1]
+        The integer identifier of the task to run the model on
+    model : sklearn model
+        a model which has a function fit(X,Y) and predict(X),
+        all supervised estimators of scikit learn follow this definition of a model [1]
         [1](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
 
 
     Returns
     -------
-    classifier : sklearn classifier
-        the classifier, trained on the whole dataset
+    model : sklearn model
+        the model, trained on the whole dataset
     arff-dict : dict
         a dictionary with an 'attributes' and 'data' entry for an arff file
     """
-    flow_id = OpenMLFlow.ensure_flow_exists(task.api_connector, classifier)
+    flow = OpenMLFlow(model=model)
+    flow_id = flow.ensure_flow_exists(task.api_connector)
     if(flow_id < 0):
         print("No flow")
         return 0, 2
     print(flow_id)
 
-    #runname = "t" + str(task.task_id) + "_" + str(classifier)
+    #runname = "t" + str(task.task_id) + "_" + str(model)
     arff_datacontent = []
 
     dataset = task.get_dataset()
@@ -132,7 +133,7 @@ def openml_run(connector, task, classifier):
     if class_labels is None:
         raise ValueError('The task has no class labels. This method currently '
                          'only works for tasks with class labels.')
-    setup_string = create_setup_string(classifier)
+    setup_string = create_setup_string(model)
 
     run = OpenMLRun(task.task_id, flow_id, setup_string, dataset.id)
 
@@ -149,9 +150,9 @@ def openml_run(connector, task, classifier):
             testY = Y[test_indices]
 
             start_time = time.time()
-            classifier.fit(trainX, trainY)
-            ProbaY = classifier.predict_proba(testX)
-            PredY = classifier.predict(testX)
+            model.fit(trainX, trainY)
+            ProbaY = model.predict_proba(testX)
+            PredY = model.predict(testX)
             end_time = time.time()
 
             train_times.append(end_time - start_time)
@@ -166,7 +167,7 @@ def openml_run(connector, task, classifier):
         rep_no = rep_no + 1
 
     run.data_content = arff_datacontent
-    run.classifier = classifier.fit(X, Y)
+    run.model = model.fit(X, Y)
     return run
 
 
@@ -213,10 +214,10 @@ def construct_description_dictionary(taskid, flow_id, setup_string,
     return description
 
 
-def create_setup_string(classifier):
+def create_setup_string(model):
     run_environment = " ".join(get_version_information())
-    # fixme str(classifier) might contain (...)
-    return run_environment + " " + str(classifier)
+    # fixme str(model) might contain (...)
+    return run_environment + " " + str(model)
 
 
 # This can possibly be done by a package such as pyxb, but I could not get
 
@@ -99,7 +99,7 @@ def download_split(self):
         Parameters
         ----------
         task_id : Task
-            An entity of :class:`pyMetaLearn.entities.task.Task`.
+            An entity of :class:`openml.OpenMLTask`.
         """
         cached_split_file = os.path.join(
             _create_task_cache_dir(self.api_connector, self.task_id), "datasplits.arff")
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .flow import OpenMLFlow, check_flow_exists`
	`2`	`+`
	`3`	`+__all__ = ['OpenMLFlow', 'check_flow_exists']`