Update example to numpy

mfeurer · mfeurer · commit fd768180d20e · 2015-09-03T14:32:35.000+02:00
diff --git a/openml/apiconnector.py b/openml/apiconnector.py
@@ -21,7 +21,7 @@
 import xmltodict
 
 from .entities.dataset import OpenMLDataset
-from .entities.task import Task
+from .entities.task import OpenMLTask
 from .entities.split import OpenMLSplit
 from .util import is_string
 
@@ -753,7 +753,7 @@ def _create_task_from_xml(self, xml):
             text = parameter.get("#text", "")
             estimation_parameters[name] = text
 
-        return Task(
+        return OpenMLTask(
             dic["oml:task_id"], dic["oml:task_type"],
             inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
             inputs["source_data"]["oml:data_set"]["oml:target_feature"],
diff --git a/openml/entities/task.py b/openml/entities/task.py
@@ -9,7 +9,7 @@
         import pickle
 
 
-class Task(object):
+class OpenMLTask(object):
     def __init__(self, task_id, task_type, data_set_id, target_feature,
                  estimation_procedure_type, data_splits_url,
                  estimation_parameters, evaluation_measure,cost_matrix, api_connector):
diff --git a/source/api.rst b/source/api.rst
@@ -5,4 +5,10 @@
 APIs
 ****
 
-.. autoclass:: openml.apiconnector.APIConnector
+.. autoclass:: openml.apiconnector.APIConnector
+
+.. autoclass:: openml.entities.dataset.OpenMLDataset
+
+.. autoclass:: openml.entities.task.OpenMLTask
+
+.. autoclass:: openml.entities.split.OpenMLSplit
diff --git a/source/usage.rst b/source/usage.rst
@@ -16,13 +16,11 @@ platform. If you don't have an account yet,
 
     >>> from openml.apiconnector import APIConnector
 
-    >>> username = "Your OpenML username"
-    >>> password = "Your OpenML password"
-    >>> connector = APIConnector(username=username, password=password)
+    >>> apikey = 'Your API key'
+    >>> connector = APIConnector(apikey=apikey)
 
 The :class:`~openml.apiconnector.APIConnector` will create a cache directory
-and authenticate you at the OpenML server. By this you obtain a session key,
-which is valid for one hour.
+and manage all your queries to the OpenML server.
 
 You can also configure the OpenML package, e.g. change the cache directory.
 Information about the configuration is in the
@@ -35,7 +33,7 @@ Working with datasets
 .. code:: python
 
     >>> dataset_id = 31
-    >>> dataset = connector.download_dataset(1)
+    >>> dataset = connector.download_dataset(dataset_id)
 
 Attributes of the dataset are stored as member variables:
 
@@ -50,18 +48,24 @@ Data can be loaded in the following ways:
 
 .. code:: python
 
-    >>> pd, categorical = dataset.get_pandas()
+    >>> X = dataset.get_dataset()
 
-returns the dataset as a pandas.DataFrame and a list of booleans,
-indicating which attributes are categorical. Categorical attributes are
-already encoded as integers.
+returns the dataset as a np.ndarray. In case the data is sparse,
+a scipy.sparse.csr matrix is returned.
+
+Most times, having only the X matrix is not enough. Two very useful arguments
+are `target` and `return_categorical_indicator`. `target` makes `get_dataset
+()` return `X` and `y` seperate; `return_categorical_indicator` makes
+`get_dataset()` return a boolean array which indicate which attributes are
+categorical (and should be one hot encoded.)
 
 .. code:: python
 
-    >>> X, y, categorical = dataset.get_pandas()
+    >>> X, y, categorical = dataset.get_dataset(
+        target=dataset.default_target_attribute,
+        return_categorical_indicator=True)
 
-returns the dataset split into X and y, as well as a list indicating which
-attributes are categorical. In case you are working with `scikit-learn
+In case you are working with `scikit-learn
 <http://scikit-learn>`_, you can use this data right away:
 
 .. code:: python
@@ -72,7 +76,7 @@ attributes are categorical. In case you are working with `scikit-learn
         True, False, True, True, False, True, False, True, True, False, True,
         False, True, True], dtype=<type 'float'>, n_values='auto',
         sparse=True)
-    >>> X = enc.transform(X).todense()
+    >>> X = enc.fit_transform(X).todense()
     >>> clf = ensemble.RandomForestClassifier()
     >>> clf.fit(X, y)
     RandomForestClassifier(bootstrap=True, compute_importances=None,
diff --git a/tests/entities/test_task.py b/tests/entities/test_task.py
@@ -13,7 +13,7 @@
 
 from openml.entities.dataset import OpenMLDataset
 from openml.entities.split import OpenMLSplit
-from openml.entities.task import Task
+from openml.entities.task import OpenMLTask
 from openml.apiconnector import APIConnector
 
 
@@ -27,7 +27,7 @@ def setUp(self, api_connector_mock):
 
         api_connector_mock.return_value = None
         self.api_connector = APIConnector()
-        self.task = Task(1, "supervised classification", 1, "class",
+        self.task = OpenMLTask(1, "supervised classification", 1, "class",
                          "crossvalidation wth holdout", None, None, None,
                          None, self.api_connector)
 
@@ -38,7 +38,7 @@ def test_get_dataset(self, api_connector_mock):
         self.assertEqual(api_connector_mock.return_value, retval)
         api_connector_mock.assert_called_with(self.api_connector, 1)
 
-    @mock.patch.object(Task, "get_dataset", autospec=True)
+    @mock.patch.object(OpenMLTask, "get_dataset", autospec=True)
     def test_get_X_and_Y(self, task_mock):
         dataset = mock.create_autospec(OpenMLDataset)
         dataset.get_pandas = lambda target=None: (pd.DataFrame(np.zeros((10, 10))),