Skip to content

Commit fd76818

Browse files
committed
Update example to numpy
1 parent aae311f commit fd76818

5 files changed

Lines changed: 31 additions & 21 deletions

File tree

openml/apiconnector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import xmltodict
2222

2323
from .entities.dataset import OpenMLDataset
24-
from .entities.task import Task
24+
from .entities.task import OpenMLTask
2525
from .entities.split import OpenMLSplit
2626
from .util import is_string
2727

@@ -753,7 +753,7 @@ def _create_task_from_xml(self, xml):
753753
text = parameter.get("#text", "")
754754
estimation_parameters[name] = text
755755

756-
return Task(
756+
return OpenMLTask(
757757
dic["oml:task_id"], dic["oml:task_type"],
758758
inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
759759
inputs["source_data"]["oml:data_set"]["oml:target_feature"],

openml/entities/task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pickle
1010

1111

12-
class Task(object):
12+
class OpenMLTask(object):
1313
def __init__(self, task_id, task_type, data_set_id, target_feature,
1414
estimation_procedure_type, data_splits_url,
1515
estimation_parameters, evaluation_measure,cost_matrix, api_connector):

source/api.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,10 @@
55
APIs
66
****
77

8-
.. autoclass:: openml.apiconnector.APIConnector
8+
.. autoclass:: openml.apiconnector.APIConnector
9+
10+
.. autoclass:: openml.entities.dataset.OpenMLDataset
11+
12+
.. autoclass:: openml.entities.task.OpenMLTask
13+
14+
.. autoclass:: openml.entities.split.OpenMLSplit

source/usage.rst

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,11 @@ platform. If you don't have an account yet,
1616
1717
>>> from openml.apiconnector import APIConnector
1818
19-
>>> username = "Your OpenML username"
20-
>>> password = "Your OpenML password"
21-
>>> connector = APIConnector(username=username, password=password)
19+
>>> apikey = 'Your API key'
20+
>>> connector = APIConnector(apikey=apikey)
2221
2322
The :class:`~openml.apiconnector.APIConnector` will create a cache directory
24-
and authenticate you at the OpenML server. By this you obtain a session key,
25-
which is valid for one hour.
23+
and manage all your queries to the OpenML server.
2624

2725
You can also configure the OpenML package, e.g. change the cache directory.
2826
Information about the configuration is in the
@@ -35,7 +33,7 @@ Working with datasets
3533
.. code:: python
3634
3735
>>> dataset_id = 31
38-
>>> dataset = connector.download_dataset(1)
36+
>>> dataset = connector.download_dataset(dataset_id)
3937
4038
Attributes of the dataset are stored as member variables:
4139

@@ -50,18 +48,24 @@ Data can be loaded in the following ways:
5048

5149
.. code:: python
5250
53-
>>> pd, categorical = dataset.get_pandas()
51+
>>> X = dataset.get_dataset()
5452
55-
returns the dataset as a pandas.DataFrame and a list of booleans,
56-
indicating which attributes are categorical. Categorical attributes are
57-
already encoded as integers.
53+
returns the dataset as a np.ndarray. In case the data is sparse,
54+
a scipy.sparse.csr matrix is returned.
55+
56+
Most times, having only the X matrix is not enough. Two very useful arguments
57+
are `target` and `return_categorical_indicator`. `target` makes `get_dataset
58+
()` return `X` and `y` seperate; `return_categorical_indicator` makes
59+
`get_dataset()` return a boolean array which indicate which attributes are
60+
categorical (and should be one hot encoded.)
5861

5962
.. code:: python
6063
61-
>>> X, y, categorical = dataset.get_pandas()
64+
>>> X, y, categorical = dataset.get_dataset(
65+
target=dataset.default_target_attribute,
66+
return_categorical_indicator=True)
6267
63-
returns the dataset split into X and y, as well as a list indicating which
64-
attributes are categorical. In case you are working with `scikit-learn
68+
In case you are working with `scikit-learn
6569
<http://scikit-learn>`_, you can use this data right away:
6670

6771
.. code:: python
@@ -72,7 +76,7 @@ attributes are categorical. In case you are working with `scikit-learn
7276
True, False, True, True, False, True, False, True, True, False, True,
7377
False, True, True], dtype=<type 'float'>, n_values='auto',
7478
sparse=True)
75-
>>> X = enc.transform(X).todense()
79+
>>> X = enc.fit_transform(X).todense()
7680
>>> clf = ensemble.RandomForestClassifier()
7781
>>> clf.fit(X, y)
7882
RandomForestClassifier(bootstrap=True, compute_importances=None,

tests/entities/test_task.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from openml.entities.dataset import OpenMLDataset
1515
from openml.entities.split import OpenMLSplit
16-
from openml.entities.task import Task
16+
from openml.entities.task import OpenMLTask
1717
from openml.apiconnector import APIConnector
1818

1919

@@ -27,7 +27,7 @@ def setUp(self, api_connector_mock):
2727

2828
api_connector_mock.return_value = None
2929
self.api_connector = APIConnector()
30-
self.task = Task(1, "supervised classification", 1, "class",
30+
self.task = OpenMLTask(1, "supervised classification", 1, "class",
3131
"crossvalidation wth holdout", None, None, None,
3232
None, self.api_connector)
3333

@@ -38,7 +38,7 @@ def test_get_dataset(self, api_connector_mock):
3838
self.assertEqual(api_connector_mock.return_value, retval)
3939
api_connector_mock.assert_called_with(self.api_connector, 1)
4040

41-
@mock.patch.object(Task, "get_dataset", autospec=True)
41+
@mock.patch.object(OpenMLTask, "get_dataset", autospec=True)
4242
def test_get_X_and_Y(self, task_mock):
4343
dataset = mock.create_autospec(OpenMLDataset)
4444
dataset.get_pandas = lambda target=None: (pd.DataFrame(np.zeros((10, 10))),

0 commit comments

Comments
 (0)