Skip to content

Commit 59a487a

Browse files
committed
more documentation, make more functions private, add server errors when we don't get back 200.
1 parent 3632127 commit 59a487a

8 files changed

Lines changed: 100 additions & 97 deletions

File tree

doc/progress.rst

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,3 @@ API call implemented tested properly test
6666
=============================================== =========== ====== =============== ========== =====================
6767

6868
We do not plan to implement API calls marked with an **X**!
69-
70-
Convenience Functions
71-
=====================
72-
73-
=============================================== =========== ====== =============== ========== =====================
74-
Method implemented tested properly tested loads json proper error handling
75-
=============================================== =========== ====== =============== ========== =====================
76-
_get_cached_split yes
77-
_get_cached_splits yes
78-
_get_cached_dataset yes yes
79-
_get_cached_datasets yes yes
80-
get_cached_task yes
81-
get_cached_tasks yes
82-
=============================================== =========== ====== =============== ========== =====================

openml/datasets/dataset.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import pickle
1717

1818
from ..util import is_string
19+
from ..exceptions import OpenMLServerError
1920
from .._api_calls import _perform_api_call
2021

2122
logger = logging.getLogger(__name__)
@@ -271,6 +272,8 @@ def publish(self):
271272
"/data/", data=data, file_dictionary={'dataset': self.data_file})
272273
else:
273274
return_code, return_value = _perform_api_call("/data/", data=data)
275+
if return_code != 200:
276+
raise OpenMLServerError(return_value)
274277
return return_code, return_value
275278

276279
def _to_xml(self):

openml/exceptions.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@ def __init__(self, message):
1414

1515

1616
class OpenMLServerError(PyOpenMLError):
17+
"""Server didn't respond 200."""
1718
def __init__(self, message):
19+
message = "OpenML Server error: " + message
1820
super(OpenMLServerError, self).__init__(message)
1921

2022

2123
class OpenMLCacheException(PyOpenMLError):
24+
"""Dataset / task etc not found in cache"""
2225
def __init__(self, message):
23-
super(OpenMLCacheException, self).__init__(message)
26+
super(OpenMLCacheException, self).__init__(message)

openml/flows/flow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sklearn
44

55
from .._api_calls import _perform_api_call
6+
from ..exceptions import OpenMLServerError
67

78

89
class OpenMLFlow(object):
@@ -85,6 +86,8 @@ def publish(self):
8586
data = {'description': xml_description, 'source': self.source}
8687
return_code, return_value = _perform_api_call(
8788
"/flow/", data=data)
89+
if return_code != 200:
90+
raise OpenMLServerError(return_value)
8891
return return_code, return_value
8992

9093
def _ensure_flow_exists(self):

openml/runs/run.py

Lines changed: 87 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,17 @@
1212
from ..tasks import get_task
1313
from ..tasks.task_functions import _create_task_from_xml
1414
from .._api_calls import _perform_api_call
15+
from ..exceptions import OpenMLServerError
1516

1617

1718
class OpenMLRun(object):
19+
"""OpenML Run: result of running a model on an openml dataset.
20+
21+
Parameters
22+
----------
23+
FIXME
24+
25+
"""
1826
def __init__(self, task_id, flow_id, setup_string, dataset_id, files=None,
1927
setup_id=None, tags=None, run_id=None, uploader=None,
2028
uploader_name=None, evaluations=None,
@@ -39,21 +47,14 @@ def __init__(self, task_id, flow_id, setup_string, dataset_id, files=None,
3947
self.data_content = data_content
4048
self.model = model
4149

42-
def generate_arff(self):
43-
"""Generates an arff
44-
45-
Parameters
46-
----------
47-
arff_datacontent : list
48-
a list of lists containing, in order:
49-
- repeat (int)
50-
- fold (int)
51-
- test index (int)
52-
- predictions per task label (float)
53-
- predicted class label (string)
54-
- actual class label (string)
55-
task : Task
56-
the OpenML task for which the run is done
50+
def _generate_arff(self):
51+
"""Generates an arff for upload to server.
52+
53+
Returns
54+
-------
55+
arf_dict : dictionary
56+
Dictionary representation of an ARFF data format containing
57+
predictions and confidences.
5758
"""
5859
run_environment = (_get_version_information() +
5960
[time.strftime("%c")] + ['Created by run_task()'])
@@ -73,15 +74,28 @@ def generate_arff(self):
7374
return arff_dict
7475

7576
def publish(self):
76-
predictions = arff.dumps(self.generate_arff())
77-
description_xml = self.create_description_xml()
77+
"""Publish a run to the OpenML server.
78+
79+
Uploads the results of a run to OpenML.
80+
"""
81+
predictions = arff.dumps(self._generate_arff())
82+
description_xml = self._create_description_xml()
7883
data = {'predictions': predictions, 'description':
7984
description_xml}
8085
return_code, return_value = _perform_api_call(
8186
"/run/", file_elements=data)
87+
if return_code != 200:
88+
raise OpenMLServerError(return_value)
8289
return return_code, return_value
8390

84-
def create_description_xml(self):
91+
def _create_description_xml(self):
92+
"""Create xml representation of run for upload.
93+
94+
Returns
95+
-------
96+
xml_string : string
97+
XML description of run.
98+
"""
8599
run_environment = _get_version_information()
86100
setup_string = '' # " ".join(sys.argv);
87101

@@ -103,8 +117,8 @@ def run_task(task, model):
103117
104118
Parameters
105119
----------
106-
taskid : int
107-
The integer identifier of the task to run the model on
120+
task : OpenMLTask
121+
Task to perform.
108122
model : sklearn model
109123
a model which has a function fit(X,Y) and predict(X),
110124
all supervised estimators of scikit learn follow this definition of a model [1]
@@ -113,10 +127,8 @@ def run_task(task, model):
113127
114128
Returns
115129
-------
116-
model : sklearn model
117-
the model, trained on the whole dataset
118-
arff-dict : dict
119-
a dictionary with an 'attributes' and 'data' entry for an arff file
130+
run : OpenMLRun
131+
Result of the run.
120132
"""
121133
flow = OpenMLFlow(model=model)
122134
flow_id = flow._ensure_flow_exists()
@@ -215,6 +227,7 @@ def _to_dict(taskid, flow_id, setup_string, parameter_settings, tags):
215227

216228

217229
def _create_setup_string(model):
230+
"""Create a string representing the model"""
218231
run_environment = " ".join(_get_version_information())
219232
# fixme str(model) might contain (...)
220233
return run_environment + " " + str(model)
@@ -244,13 +257,36 @@ def _get_version_information():
244257

245258

246259
def get_runs(run_ids):
260+
"""Gets all runs in run_ids list.
261+
262+
Parameters
263+
----------
264+
run_ids : list of ints
265+
266+
Returns
267+
-------
268+
runs : list of OpenMLRun
269+
List of runs corresponding to IDs, fetched from the server.
270+
"""
271+
247272
runs = []
248273
for run_id in run_ids:
249274
runs.append(get_run(run_id))
250275
return runs
251276

252277

253278
def get_run(run_id):
279+
"""Gets run corresponding to run_id.
280+
281+
Parameters
282+
----------
283+
run_id : int
284+
285+
Returns
286+
-------
287+
run : OpenMLRun
288+
Run corresponding to ID, fetched from the server.
289+
"""
254290
run_file = os.path.join(config.get_cache_directory(), "runs", "run_%d.xml" % run_id)
255291

256292
try:
@@ -280,6 +316,18 @@ def get_run(run_id):
280316

281317

282318
def _create_run_from_xml(xml):
319+
"""Create a run object from xml returned from server.
320+
321+
Parameters
322+
----------
323+
run_xml : string
324+
XML describing a run.
325+
326+
Returns
327+
-------
328+
run : OpenMLRun
329+
New run object representing run_xml.
330+
"""
283331
run = xmltodict.parse(xml)["oml:run"]
284332
run_id = int(run['oml:run_id'])
285333
uploader = int(run['oml:uploader'])
@@ -348,6 +396,7 @@ def _create_run_from_xml(xml):
348396

349397

350398
def _get_cached_run(run_id):
399+
"""Load a run from the cache."""
351400
for cache_dir in [config.get_cache_directory(), config.get_private_directory()]:
352401
run_cache_dir = os.path.join(cache_dir, "runs")
353402
try:
@@ -371,7 +420,7 @@ def list_runs_by_filters(id=None, task=None, flow=None,
371420
Perform API call `/run/list/{filters} <http://www.openml.org/api_docs/#!/run/get_run_list_filters>`_
372421
373422
Parameters
374-
==========
423+
----------
375424
id : int or list
376425
377426
task : int or list
@@ -381,7 +430,7 @@ def list_runs_by_filters(id=None, task=None, flow=None,
381430
uploader : int or list
382431
383432
Returns
384-
=======
433+
-------
385434
list
386435
List of found runs.
387436
"""
@@ -430,11 +479,11 @@ def list_runs_by_tag(tag):
430479
Perform API call `/run/list/tag/{tag} <http://www.openml.org/api_docs/#!/run/get_run_list_tag_tag>`_
431480
432481
Parameters
433-
==========
482+
----------
434483
tag : str
435484
436485
Returns
437-
=======
486+
-------
438487
list
439488
List of found runs.
440489
"""
@@ -447,11 +496,11 @@ def list_runs(run_ids):
447496
Perform API call `/run/list/run/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_run_ids>`_
448497
449498
Parameters
450-
==========
499+
----------
451500
run_id : int or list
452501
453502
Returns
454-
=======
503+
-------
455504
list
456505
List of found runs.
457506
"""
@@ -464,11 +513,11 @@ def list_runs_by_task(task_id):
464513
Perform API call `/run/list/task/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_task_ids>`_
465514
466515
Parameters
467-
==========
516+
----------
468517
task_id : int or list
469518
470519
Returns
471-
=======
520+
-------
472521
list
473522
List of found runs.
474523
"""
@@ -481,11 +530,11 @@ def list_runs_by_flow(flow_id):
481530
Perform API call `/run/list/flow/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_flow_ids>`_
482531
483532
Parameters
484-
==========
533+
----------
485534
flow_id : int or list
486535
487536
Returns
488-
=======
537+
-------
489538
list
490539
List of found runs.
491540
"""
@@ -498,11 +547,11 @@ def list_runs_by_uploader(uploader_id):
498547
Perform API call `/run/list/uploader/{ids} <http://www.openml.org/api_docs/#!/run/get_run_list_uploader_ids>`_
499548
500549
Parameters
501-
==========
550+
----------
502551
uploader_id : int or list
503552
504553
Returns
505-
=======
554+
-------
506555
list
507556
List of found runs.
508557
"""
@@ -524,13 +573,13 @@ def _list_runs_by(id_, by):
524573
name follows the convention run/list/{by}/{id}
525574
526575
Parameters
527-
==========
576+
----------
528577
id_ : int or list
529578
530579
by : str
531580
532581
Returns
533-
=======
582+
-------
534583
list
535584
List of found runs.
536585

openml/tasks/split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def __eq__(self, other):
6060
return True
6161

6262
@classmethod
63-
def from_arff_file(cls, filename, cache=True):
63+
def _from_arff_file(cls, filename, cache=True):
6464
repetitions = None
6565
pkl_filename = filename.replace(".arff", ".pkl")
6666
if cache:

openml/tasks/split_functions.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

0 commit comments

Comments
 (0)