Skip to content

Commit 1010a22

Browse files
committed
Merge pull request #91 from amueller/rename_private
make functions that should be private private, rename stuff
2 parents cc255f8 + d3688ef commit 1010a22

20 files changed

Lines changed: 119 additions & 142 deletions

doc/api.rst

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,13 @@ Dataset Functions
2929
:toctree: generated/
3030
:template: function.rst
3131

32-
datasets_active
33-
download_dataset_arff
34-
download_dataset_description
35-
download_dataset_features
36-
download_dataset_qualities
37-
download_dataset
38-
download_datasets
39-
get_cached_datasets
40-
get_list_of_cached_datasets
41-
get_dataset_list
42-
get_cached_dataset
43-
get_dataset_list
32+
check_datasets_active
33+
get_dataset_description
34+
get_dataset_features
35+
get_dataset_qualities
36+
get_dataset
37+
get_datasets
38+
list_datasets
4439

4540
Run Functions
4641
--------------
@@ -50,12 +45,8 @@ Run Functions
5045
:toctree: generated/
5146
:template: function.rst
5247

53-
construct_description_dictionary
54-
create_setup_string
55-
get_version_information
56-
openml_run
57-
download_run
58-
get_cached_run
48+
run_task
49+
get_run
5950

6051
Task Functions
6152
--------------
@@ -65,10 +56,8 @@ Task Functions
6556
:toctree: generated/
6657
:template: function.rst
6758

68-
download_task
69-
get_task_list
70-
get_cached_splits
71-
get_cached_split
59+
get_task
60+
list_tasks
7261

7362
Flow Functions
7463
--------------
@@ -78,4 +67,3 @@ Flow Functions
7867
:toctree: generated/
7968
:template: function.rst
8069

81-
check_flow_exists

doc/progress.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ Convenience Functions
6161
=============================================== =========== ====== =============== ========== =====================
6262
Method implemented tested properly tested loads json proper error handling
6363
=============================================== =========== ====== =============== ========== =====================
64-
get_cached_split yes
65-
get_cached_splits yes
66-
get_cached_dataset yes yes
67-
get_cached_datasets yes yes
64+
_get_cached_split yes
65+
_get_cached_splits yes
66+
_get_cached_dataset yes yes
67+
_get_cached_datasets yes yes
6868
get_cached_task yes
6969
get_cached_tasks yes
7070
=============================================== =========== ====== =============== ========== =====================

doc/usage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Working with datasets
3333
.. code:: python
3434
3535
>>> dataset_id = 31
36-
>>> dataset = connector.download_dataset(dataset_id)
36+
>>> dataset = connector.get_dataset(dataset_id)
3737
3838
Attributes of the dataset are stored as member variables:
3939

examples/OpenMLDemo.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
}
127127
],
128128
"source": [
129-
"datasets = openml.datasets.get_dataset_list(connector)\n",
129+
"datasets = openml.datasets.list_datasets(connector)\n",
130130
"\n",
131131
"data = pd.DataFrame(datasets)\n",
132132
"print(\"First 10 of %s datasets...\" % len(datasets))\n",
@@ -236,7 +236,7 @@
236236
}
237237
],
238238
"source": [
239-
"dataset = openml.datasets.download_dataset(connector, 61)\n",
239+
"dataset = openml.datasets.get_dataset(connector, 61)\n",
240240
"\n",
241241
"print(\"This is dataset '%s', the target feature is called '%s'\" % (dataset.name, dataset.default_target_attribute))\n",
242242
"print(\"URL: %s\" % dataset.url)\n",
@@ -394,7 +394,7 @@
394394
}
395395
],
396396
"source": [
397-
"dataset = openml.datasets.download_dataset(connector, 61)\n",
397+
"dataset = openml.datasets.get_dataset(connector, 61)\n",
398398
"X, y = dataset.get_dataset(target=dataset.default_target_attribute)\n",
399399
"clf = ensemble.RandomForestClassifier()\n",
400400
"clf.fit(X, y)"
@@ -532,7 +532,7 @@
532532
}
533533
],
534534
"source": [
535-
"task_list = openml.tasks.get_task_list(connector)\n",
535+
"task_list = openml.tasks.list_tasks(connector)\n",
536536
"\n",
537537
"tasks = pd.DataFrame(task_list)\n",
538538
"print(\"First 5 of %s tasks:\" % len(tasks))\n",
@@ -563,7 +563,7 @@
563563
}
564564
],
565565
"source": [
566-
"task = openml.tasks.download_task(connector, 10)\n",
566+
"task = openml.tasks.get_task(connector, 10)\n",
567567
"print(task)"
568568
]
569569
},
@@ -598,10 +598,10 @@
598598
}
599599
],
600600
"source": [
601-
"from openml.runs import openml_run\n",
601+
"from openml.runs import run_task\n",
602602
"\n",
603603
"clf = ensemble.RandomForestClassifier()\n",
604-
"run = openml_run(connector, task, clf)\n",
604+
"run = run_task(connector, task, clf)\n",
605605
"print(\"RandomForest has run on the task.\")"
606606
]
607607
},

examples/sklearn/openml_run_example.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from openml.apiconnector import APIConnector
2-
from openml.autorun import openml_run
2+
from openml.autorun import run_task
33
from sklearn import ensemble
44
import xmltodict
55
import os
66
"""
7-
An example of an automated machine learning experiment using openml_run
7+
An example of an automated machine learning experiment using run_task
88
"""
99

1010
key_file_path = "apikey.txt"
@@ -15,9 +15,9 @@
1515

1616
clf = ensemble.RandomForestClassifier()
1717
connector = APIConnector(apikey = key)
18-
task = connector.download_task(task_id)
18+
task = connector.get_task(task_id)
1919

20-
prediction_path, description_path = openml_run(task, clf)
20+
prediction_path, description_path = run_task(task, clf)
2121

2222
prediction_abspath = os.path.abspath(prediction_path)
2323
description_abspath = os.path.abspath(description_path)

openml/datasets/__init__.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
from .functions import (get_list_of_cached_datasets, get_cached_datasets,
2-
get_cached_dataset, get_dataset_list, datasets_active,
3-
download_datasets, download_dataset,
4-
download_dataset_description, download_dataset_arff,
5-
download_dataset_features, download_dataset_qualities)
1+
from .functions import (list_datasets, check_datasets_active,
2+
get_datasets, get_dataset,
3+
get_dataset_description,
4+
get_dataset_features, get_dataset_qualities)
65
from .dataset import OpenMLDataset
76

8-
__all__ = ['datasets_active', 'download_dataset', 'download_datasets',
9-
'download_datasets_arf', 'download_dataset_features',
10-
'download_dataset_qualities', 'get_cached_datasets',
11-
'OpenMLDataset', 'get_list_of_cached_datasets', 'get_dataset_list',
12-
'get_cached_dataset', 'download_dataset_description',
13-
'download_dataset_arff', 'get_dataset_list']
7+
__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
8+
'get_datasets_arf', 'get_dataset_features',
9+
'get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
10+
'get_dataset_description', 'list_datasets']

openml/datasets/functions.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
############################################################################
1616
# Local getters/accessors to the cache directory
1717

18-
def get_list_of_cached_datasets(api_connector):
18+
def _list_cached_datasets(api_connector):
1919
"""Return list with ids of all cached datasets"""
2020
datasets = []
2121

@@ -46,20 +46,20 @@ def get_list_of_cached_datasets(api_connector):
4646
return datasets
4747

4848

49-
def get_cached_datasets(api_connector):
49+
def _get_cached_datasets(api_connector):
5050
"""Searches for all OpenML datasets in the OpenML cache dir.
5151
5252
Return a dictionary which maps dataset ids to dataset objects"""
53-
dataset_list = get_list_of_cached_datasets(api_connector)
53+
dataset_list = _list_cached_datasets(api_connector)
5454
datasets = OrderedDict()
5555

5656
for did in dataset_list:
57-
datasets[did] = get_cached_dataset(api_connector, did)
57+
datasets[did] = _get_cached_dataset(api_connector, did)
5858

5959
return datasets
6060

6161

62-
def get_cached_dataset(api_connector, did):
62+
def _get_cached_dataset(api_connector, did):
6363
# This code is slow...replace it with new API calls
6464
description = _get_cached_dataset_description(api_connector, did)
6565
arff_file = _get_cached_dataset_arff(api_connector, did)
@@ -103,7 +103,7 @@ def _get_cached_dataset_arff(api_connector, did):
103103
"cached" % did)
104104

105105

106-
def get_dataset_list(api_connector):
106+
def list_datasets(api_connector):
107107
"""Return a list of all dataset which are on OpenML.
108108
109109
Returns
@@ -144,7 +144,7 @@ def get_dataset_list(api_connector):
144144
return datasets
145145

146146

147-
def datasets_active(api_connector, dids):
147+
def check_datasets_active(api_connector, dids):
148148
"""Check if the dataset ids provided are active.
149149
150150
Parameters
@@ -158,7 +158,7 @@ def datasets_active(api_connector, dids):
158158
A dictionary with items {did: active}, where active is a boolean. It
159159
is set to True if the dataset is active.
160160
"""
161-
dataset_list = get_dataset_list(api_connector)
161+
dataset_list = list_datasets(api_connector)
162162
dids = sorted(dids)
163163
active = {}
164164

@@ -171,7 +171,7 @@ def datasets_active(api_connector, dids):
171171
dataset_list_idx = idx
172172

173173

174-
def download_datasets(api_connector, dids):
174+
def get_datasets(api_connector, dids):
175175
"""Download datasets.
176176
177177
Parameters
@@ -186,16 +186,16 @@ def download_datasets(api_connector, dids):
186186
187187
Notes
188188
-----
189-
Uses :func:`download_dataset` internally. Please read
189+
Uses :func:`get_dataset` internally. Please read
190190
the documentation of this.
191191
"""
192192
datasets = []
193193
for did in dids:
194-
datasets.append(download_dataset(api_connector, did))
194+
datasets.append(get_dataset(api_connector, did))
195195
return datasets
196196

197197

198-
def download_dataset(api_connector, did):
198+
def get_dataset(api_connector, did):
199199
"""Download a dataset.
200200
201201
TODO: explain caching!
@@ -215,14 +215,14 @@ def download_dataset(api_connector, did):
215215
raise ValueError("Dataset ID is neither an Integer nor can be "
216216
"cast to an Integer.")
217217

218-
description = download_dataset_description(api_connector, did)
219-
arff_file = download_dataset_arff(api_connector, did, description=description)
218+
description = get_dataset_description(api_connector, did)
219+
arff_file = _get_dataset_arff(api_connector, did, description=description)
220220

221221
dataset = _create_dataset_from_description(description, arff_file)
222222
return dataset
223223

224224

225-
def download_dataset_description(api_connector, did):
225+
def get_dataset_description(api_connector, did):
226226
# TODO implement a cache for this that invalidates itself after some
227227
# time
228228
# This can be saved on disk, but cannot be cached properly, because
@@ -260,7 +260,7 @@ def download_dataset_description(api_connector, did):
260260
return description
261261

262262

263-
def download_dataset_arff(api_connector, did, description=None):
263+
def _get_dataset_arff(api_connector, did, description=None):
264264
did_cache_dir = _create_dataset_cache_dir(api_connector, did)
265265
output_file = os.path.join(did_cache_dir, "dataset.arff")
266266

@@ -274,7 +274,7 @@ def download_dataset_arff(api_connector, did, description=None):
274274
pass
275275

276276
if description is None:
277-
description = download_dataset_description(api_connector, did)
277+
description = get_dataset_description(api_connector, did)
278278
url = description['oml:url']
279279
return_code, arff_string = api_connector._read_url(url)
280280
# TODO: it is inefficient to load the dataset in memory prior to
@@ -286,7 +286,7 @@ def download_dataset_arff(api_connector, did, description=None):
286286
return output_file
287287

288288

289-
def download_dataset_features(api_connector, did):
289+
def get_dataset_features(api_connector, did):
290290
did_cache_dir = _create_dataset_cache_dir(api_connector, did)
291291
features_file = os.path.join(did_cache_dir, "features.xml")
292292

@@ -316,7 +316,7 @@ def download_dataset_features(api_connector, did):
316316
return features
317317

318318

319-
def download_dataset_qualities(api_connector, did):
319+
def get_dataset_qualities(api_connector, did):
320320
# Dataset qualities are subject to change and must be fetched every time
321321
did_cache_dir = _create_dataset_cache_dir(api_connector, did)
322322
qualities_file = os.path.join(did_cache_dir, "qualities.xml")

openml/flows/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .flow import OpenMLFlow, check_flow_exists
1+
from .flow import OpenMLFlow
22

3-
__all__ = ['OpenMLFlow', 'check_flow_exists']
3+
__all__ = ['OpenMLFlow']

openml/flows/flow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
class OpenMLFlow(object):
77
def __init__(self, model, id=None, uploader=None,
8-
description='Flow generated by openml_run', creator=None,
8+
description='Flow generated by run_task', creator=None,
99
contributor=None, tag=None):
1010
self.id = id
1111
self.upoader = uploader
@@ -66,7 +66,7 @@ def ensure_flow_exists(self, connector):
6666
"""
6767
import sklearn
6868
flow_version = 'Tsklearn_' + sklearn.__version__
69-
_, _, flow_id = check_flow_exists(connector, self.name, flow_version)
69+
_, _, flow_id = _check_flow_exists(connector, self.name, flow_version)
7070

7171
if int(flow_id) == -1:
7272
return_code, response_xml = self.publish(connector)
@@ -78,7 +78,7 @@ def ensure_flow_exists(self, connector):
7878
return int(flow_id)
7979

8080

81-
def check_flow_exists(api_connector, name, version):
81+
def _check_flow_exists(api_connector, name, version):
8282
"""Retrieves the flow id of the flow uniquely identified by name+version.
8383
8484
Returns flow id if such a flow exists,

openml/runs/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
from .run import OpenMLRun
2-
from .run import (construct_description_dictionary, create_setup_string,
3-
get_version_information, openml_run, download_run,
4-
get_cached_run)
2+
from .run import run_task, get_run
53

6-
__all__ = ['OpenMLRun', 'construct_description_dictionary',
7-
'create_setup_string', 'get_version_information', 'openml_run',
8-
'download_run', 'get_cached_run']
4+
__all__ = ['OpenMLRun', 'run_task', 'get_run']

0 commit comments

Comments
 (0)