MAINT minor changes to code

mfeurer · mfeurer · commit a972eeb6ffc7 · 2017-02-01T10:28:44.000+01:00
diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
@@ -1,6 +1,6 @@
 from .flow import OpenMLFlow
 from .sklearn_converter import sklearn_to_flow, flow_to_sklearn
-from .functions import get_flow, get_flow_dict
+from .functions import get_flow
 
 __all__ = ['OpenMLFlow', 'create_flow_from_model', 'get_flow',
            'sklearn_to_flow', 'flow_to_sklearn']
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -25,20 +25,4 @@ def get_flow(flow_id):
     if 'sklearn' in flow.external_version:
         flow.model = flow_to_sklearn(flow)
 
-    return flow
-
-
-def get_flow_dict(flow):
-    """Returns a dictionary with keys flow name and values flow id.
-        Parameters
-        ----------
-        flow : OpenMLFlow
-        """
-    if flow.flow_id is None:
-        raise PyOpenMLError(
-            "Can only invoke function 'get_flow_map' on a server downloaded flow. ")
-    flow_map = {flow.name: flow.flow_id}
-    for subflow in flow.components:
-        flow_map.update(get_flow_dict(flow.components[subflow]))
-
-    return flow_map
+    return flow
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -82,17 +82,8 @@ def _run_task_get_arffcontent(model, task, class_labels):
 
             model.fit(trainX, trainY)
             if isinstance(model, BaseSearchCV):
-                for itt_no in range(0, len(model.cv_results_['mean_test_score'])):
-                    # we use the string values for True and False, as it is defined in this way by the OpenML server
-                    selected = 'false'
-                    if itt_no == model.best_index_:
-                       selected = 'true'
-                    test_score = model.cv_results_['mean_test_score'][itt_no]
-                    arff_line = [rep_no, fold_no, itt_no, test_score, selected]
-                    for key in model.cv_results_:
-                        if key.startswith("param_"):
-                            arff_line.append(str(model.cv_results_[key][itt_no]))
-                    arff_tracecontent.append(arff_line)
+                _add_results_to_arfftrace(arff_tracecontent, fold_no, model,
+                                          rep_no)
 
             ProbaY = model.predict_proba(testX)
             PredY = model.predict(testX)
@@ -113,6 +104,20 @@ def _run_task_get_arffcontent(model, task, class_labels):
     return arff_datacontent, arff_tracecontent
 
 
+def _add_results_to_arfftrace(arff_tracecontent, fold_no, model, rep_no):
+    for itt_no in range(0, len(model.cv_results_['mean_test_score'])):
+        # we use the string values for True and False, as it is defined in this way by the OpenML server
+        selected = 'false'
+        if itt_no == model.best_index_:
+            selected = 'true'
+        test_score = model.cv_results_['mean_test_score'][itt_no]
+        arff_line = [rep_no, fold_no, itt_no, test_score, selected]
+        for key in model.cv_results_:
+            if key.startswith("param_"):
+                arff_line.append(str(model.cv_results_[key][itt_no]))
+        arff_tracecontent.append(arff_line)
+
+
 def get_runs(run_ids):
     """Gets all runs in run_ids list.
 
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -110,8 +110,7 @@ def _generate_trace_arff_dict(self, model):
                     type = 'NUMERIC'
                 else:
                     values = list(set(model.cv_results_[key])) # unique values
-                    if len(values) < 100: # arbitrary number. make it an option?
-                        type = [str(i) for i in values]
+                    type = [str(i) for i in values]
                     print(key + ": " + str(type))
 
                 attribute = ("parameter_" + key[6:], type)
@@ -179,19 +178,26 @@ def _create_description_xml(self):
         return description_xml
 
 def _parse_parameters(model, flow):
-    """Extracts all parameter settings from an model in OpenML format.
+    """Extracts all parameter settings from a model in OpenML format.
 
     Parameters
     ----------
     model
-        the sci-kit learn model (fitted)
+        the scikit-learn model (fitted)
     flow
         openml flow object (containing flow ids, i.e., it has to be downloaded from the server)
 
     """
     python_param_settings = model.get_params()
     openml_param_settings = []
-    flow_dict = openml.flows.get_flow_dict(flow)
+
+    def get_flow_dict(_flow):
+        flow_map = {_flow.name: _flow.flow_id}
+        for subflow in _flow.components:
+            flow_map.update(get_flow_dict(_flow.components[subflow]))
+        return flow_map
+
+    flow_dict = get_flow_dict(flow)
 
     for param in python_param_settings:
         if "__" in param: