Skip to content

Commit 08c9cf6

Browse files
authored
Merge pull request #356 from amueller/task_tagging
tagging for other stuff
2 parents 9787d3c + cfa5f2a commit 08c9cf6

10 files changed

Lines changed: 160 additions & 41 deletions

File tree

openml/_api_calls.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ def _parse_server_exception(response):
139139
additional = None
140140
if 'oml:additional_information' in server_exception['oml:error']:
141141
additional = server_exception['oml:error']['oml:additional_information']
142-
if code in [370, 372]:
142+
if code in [370, 372, 512, 500, 482]:
143+
# 512 for runs, 370 for datasets (should be 372), 500 for flows
144+
# 482 for tasks
143145
return OpenMLServerNoResult(code, message, additional)
144146
return OpenMLServerException(code, message, additional)

openml/flows/flow.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,28 @@ def publish(self):
355355
(flow_id, message))
356356
return self
357357

358+
def push_tag(self, tag):
359+
"""Annotates this flow with a tag on the server.
360+
361+
Parameters
362+
----------
363+
tag : str
364+
Tag to attach to the flow.
365+
"""
366+
data = {'flow_id': self.flow_id, 'tag': tag}
367+
_perform_api_call("/flow/tag", data=data)
368+
369+
def remove_tag(self, tag):
370+
"""Removes a tag from this flow on the server.
371+
372+
Parameters
373+
----------
374+
tag : str
375+
Tag to attach to the flow.
376+
"""
377+
data = {'flow_id': self.flow_id, 'tag': tag}
378+
_perform_api_call("/flow/untag", data=data)
379+
358380

359381
def _copy_server_fields(source_flow, target_flow):
360382
fields_added_by_the_server = ['flow_id', 'uploader', 'version',
@@ -370,5 +392,3 @@ def _copy_server_fields(source_flow, target_flow):
370392
def _add_if_nonempty(dic, key, value):
371393
if value is not None:
372394
dic[key] = value
373-
374-

openml/flows/functions.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import six
55

66
from openml._api_calls import _perform_api_call
7+
from openml.exceptions import OpenMLServerNoResult
78
from . import OpenMLFlow
89

910

@@ -70,7 +71,9 @@ def list_flows(offset=None, size=None, tag=None):
7071

7172

7273
def flow_exists(name, external_version):
73-
"""Retrieves the flow id of the flow uniquely identified by name + external_version.
74+
"""Retrieves the flow id.
75+
76+
A flow is uniquely identified by name + external_version.
7477
7578
Parameter
7679
---------
@@ -93,8 +96,9 @@ def flow_exists(name, external_version):
9396
if not (isinstance(name, six.string_types) and len(external_version) > 0):
9497
raise ValueError('Argument \'version\' should be a non-empty string')
9598

96-
xml_response = _perform_api_call("flow/exists",
97-
data={'name': name, 'external_version': external_version})
99+
xml_response = _perform_api_call(
100+
"flow/exists", data={'name': name, 'external_version':
101+
external_version})
98102

99103
result_dict = xmltodict.parse(xml_response)
100104
flow_id = int(result_dict['oml:flow_exists']['oml:id'])
@@ -105,15 +109,17 @@ def flow_exists(name, external_version):
105109

106110

107111
def _list_flows(api_call):
108-
# TODO add proper error handling here!
109-
xml_string = _perform_api_call(api_call)
112+
try:
113+
xml_string = _perform_api_call(api_call)
114+
except OpenMLServerNoResult:
115+
return []
110116
flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
111117

112118
# Minimalistic check if the XML is useful
113119
assert type(flows_dict['oml:flows']['oml:flow']) == list, \
114120
type(flows_dict['oml:flows'])
115121
assert flows_dict['oml:flows']['@xmlns:oml'] == \
116-
'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']
122+
'http://openml.org/openml', flows_dict['oml:flows']['@xmlns:oml']
117123

118124
flows = dict()
119125
for flow_ in flows_dict['oml:flows']['oml:flow']:
@@ -190,10 +196,10 @@ def assert_flows_equal(flow1, flow2,
190196
attr2 = getattr(flow2, key, None)
191197
if key == 'components':
192198
for name in set(attr1.keys()).union(attr2.keys()):
193-
if not name in attr1:
199+
if name not in attr1:
194200
raise ValueError('Component %s only available in '
195201
'argument2, but not in argument1.' % name)
196-
if not name in attr2:
202+
if name not in attr2:
197203
raise ValueError('Component %s only available in '
198204
'argument2, but not in argument1.' % name)
199205
assert_flows_equal(attr1[name], attr2[name],

openml/runs/functions.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import openml
1616
import openml.utils
17-
from ..exceptions import PyOpenMLError
17+
from ..exceptions import PyOpenMLError, OpenMLServerNoResult
1818
from .. import config
1919
from ..flows import sklearn_to_flow, get_flow, flow_exists, _check_n_jobs, \
2020
_copy_server_fields
@@ -862,8 +862,10 @@ def list_runs(offset=None, size=None, id=None, task=None, setup=None,
862862

863863
def _list_runs(api_call):
864864
"""Helper function to parse API calls which are lists of runs"""
865-
866-
xml_string = _perform_api_call(api_call)
865+
try:
866+
xml_string = _perform_api_call(api_call)
867+
except OpenMLServerNoResult:
868+
return []
867869

868870
runs_dict = xmltodict.parse(xml_string, force_list=('oml:run',))
869871
# Minimalistic check if the XML is useful

openml/runs/run.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from collections import OrderedDict, defaultdict
1+
from collections import OrderedDict
22
import json
33
import sys
44
import time
@@ -12,6 +12,7 @@
1212
from .._api_calls import _perform_api_call, _file_id_to_url, _read_url_files
1313
from ..exceptions import PyOpenMLError
1414

15+
1516
class OpenMLRun(object):
1617
"""OpenML Run: result of running a model on an openml dataset.
1718
@@ -349,6 +350,28 @@ def extract_parameters(_flow, _flow_dict, component_model,
349350

350351
return parameters
351352

353+
def push_tag(self, tag):
354+
"""Annotates this run with a tag on the server.
355+
356+
Parameters
357+
----------
358+
tag : str
359+
Tag to attach to the run.
360+
"""
361+
data = {'run_id': self.run_id, 'tag': tag}
362+
_perform_api_call("/run/tag", data=data)
363+
364+
def remove_tag(self, tag):
365+
"""Removes a tag from this run on the server.
366+
367+
Parameters
368+
----------
369+
tag : str
370+
Tag to attach to the run.
371+
"""
372+
data = {'run_id': self.run_id, 'tag': tag}
373+
_perform_api_call("/run/untag", data=data)
374+
352375

353376
################################################################################
354377
# Functions which cannot be in runs/functions due to circular imports

openml/tasks/functions.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from oslo_concurrency import lockutils
88
import xmltodict
99

10-
from ..exceptions import OpenMLCacheException
10+
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
1111
from ..datasets import get_dataset
1212
from .task import OpenMLTask, _create_task_cache_dir
1313
from .. import config
@@ -55,9 +55,9 @@ def _get_estimation_procedure_list():
5555
Returns
5656
-------
5757
procedures : list
58-
A list of all estimation procedures. Every procedure is represented by a
59-
dictionary containing the following information: id,
60-
task type id, name, type, repeats, folds, stratified.
58+
A list of all estimation procedures. Every procedure is represented by
59+
a dictionary containing the following information: id, task type id,
60+
name, type, repeats, folds, stratified.
6161
"""
6262

6363
xml_string = _perform_api_call("estimationprocedure/list")
@@ -138,7 +138,10 @@ def list_tasks(task_type_id=None, offset=None, size=None, tag=None):
138138

139139

140140
def _list_tasks(api_call):
141-
xml_string = _perform_api_call(api_call)
141+
try:
142+
xml_string = _perform_api_call(api_call)
143+
except OpenMLServerNoResult:
144+
return []
142145
tasks_dict = xmltodict.parse(xml_string, force_list=('oml:task',))
143146
# Minimalistic check if the XML is useful
144147
if 'oml:tasks' not in tasks_dict:

openml/tasks/task.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from .. import config
55
from .. import datasets
66
from .split import OpenMLSplit
7-
from .._api_calls import _read_url
7+
from .._api_calls import _read_url, _perform_api_call
88

99

1010
class OpenMLTask(object):
@@ -92,6 +92,28 @@ def get_split_dimensions(self):
9292

9393
return self.split.repeats, self.split.folds, self.split.samples
9494

95+
def push_tag(self, tag):
96+
"""Annotates this task with a tag on the server.
97+
98+
Parameters
99+
----------
100+
tag : str
101+
Tag to attach to the task.
102+
"""
103+
data = {'task_id': self.task_id, 'tag': tag}
104+
_perform_api_call("/task/tag", data=data)
105+
106+
def remove_tag(self, tag):
107+
"""Removes a tag from this task on the server.
108+
109+
Parameters
110+
----------
111+
tag : str
112+
Tag to attach to the task.
113+
"""
114+
data = {'task_id': self.task_id, 'tag': tag}
115+
_perform_api_call("/task/untag", data=data)
116+
95117

96118
def _create_task_cache_dir(task_id):
97119
task_cache_dir = os.path.join(config.get_cache_directory(), "tasks", str(task_id))

tests/test_flows/test_flow.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ class TestFlow(TestBase):
3636
_multiprocess_can_split_ = True
3737

3838
def test_get_flow(self):
39-
# We need to use the production server here because 4024 is not the test
40-
# server
39+
# We need to use the production server here because 4024 is not the
40+
# test server
4141
openml.config.server = self.production_server
4242

4343
flow = openml.flows.get_flow(4024)
@@ -67,6 +67,21 @@ def test_get_flow(self):
6767
self.assertEqual(subflow_3.parameters['L'], '-1')
6868
self.assertEqual(len(subflow_3.components), 0)
6969

70+
def test_tagging(self):
71+
flow_list = openml.flows.list_flows(size=1)
72+
flow_id = list(flow_list.keys())[0]
73+
flow = openml.flows.get_flow(flow_id)
74+
tag = "testing_tag_{}_{}".format(self.id(), time.time())
75+
flow_list = openml.flows.list_flows(tag=tag)
76+
self.assertEqual(len(flow_list), 0)
77+
flow.push_tag(tag)
78+
flow_list = openml.flows.list_flows(tag=tag)
79+
self.assertEqual(len(flow_list), 1)
80+
self.assertIn(flow_id, flow_list)
81+
flow.remove_tag(tag)
82+
flow_list = openml.flows.list_flows(tag=tag)
83+
self.assertEqual(len(flow_list), 0)
84+
7085
def test_from_xml_to_xml(self):
7186
# Get the raw xml thing
7287
# TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary

tests/test_runs/test_run.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,44 @@
1+
from time import time
2+
13
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
24
from sklearn.linear_model import LogisticRegression
35
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
46

57
from openml.testing import TestBase
68
from openml.flows.sklearn_converter import sklearn_to_flow
79
from openml import OpenMLRun
10+
import openml
811

912

1013
class TestRun(TestBase):
11-
# Splitting not helpful, these test's don't rely on the server and take less
12-
# than 1 seconds
14+
# Splitting not helpful, these test's don't rely on the server and take
15+
# less than 1 seconds
1316

1417
def test_parse_parameters_flow_not_on_server(self):
1518

1619
model = LogisticRegression()
1720
flow = sklearn_to_flow(model)
18-
self.assertRaisesRegexp(ValueError,
19-
'Flow sklearn.linear_model.logistic.LogisticRegression '
20-
'has no flow_id!',
21-
OpenMLRun._parse_parameters, flow)
21+
self.assertRaisesRegexp(
22+
ValueError, 'Flow sklearn.linear_model.logistic.LogisticRegression'
23+
' has no flow_id!', OpenMLRun._parse_parameters, flow)
2224

2325
model = AdaBoostClassifier(base_estimator=LogisticRegression())
2426
flow = sklearn_to_flow(model)
2527
flow.flow_id = 1
26-
self.assertRaisesRegexp(ValueError,
27-
'Flow sklearn.linear_model.logistic.LogisticRegression '
28-
'has no flow_id!',
29-
OpenMLRun._parse_parameters, flow)
28+
self.assertRaisesRegexp(
29+
ValueError, 'Flow sklearn.linear_model.logistic.LogisticRegression'
30+
' has no flow_id!', OpenMLRun._parse_parameters, flow)
3031

3132
def test_parse_parameters(self):
3233

3334
model = RandomizedSearchCV(
3435
estimator=RandomForestClassifier(n_estimators=5),
35-
param_distributions={"max_depth": [3, None],
36-
"max_features": [1, 2, 3, 4],
37-
"min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
38-
"min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
39-
"bootstrap": [True, False],
40-
"criterion": ["gini", "entropy"]},
36+
param_distributions={
37+
"max_depth": [3, None],
38+
"max_features": [1, 2, 3, 4],
39+
"min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
40+
"min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
41+
"bootstrap": [True, False], "criterion": ["gini", "entropy"]},
4142
cv=StratifiedKFold(n_splits=2, random_state=1),
4243
n_iter=5)
4344
flow = sklearn_to_flow(model)
@@ -49,3 +50,16 @@ def test_parse_parameters(self):
4950
if parameter['oml:name'] == 'n_estimators':
5051
self.assertEqual(parameter['oml:value'], '5')
5152
self.assertEqual(parameter['oml:component'], 2)
53+
54+
def test_tagging(self):
55+
run = openml.runs.get_run(1)
56+
tag = "testing_tag_{}_{}".format(self.id(), time())
57+
run_list = openml.runs.list_runs(tag=tag)
58+
self.assertEqual(len(run_list), 0)
59+
run.push_tag(tag)
60+
run_list = openml.runs.list_runs(tag=tag)
61+
self.assertEqual(len(run_list), 1)
62+
self.assertIn(1, run_list)
63+
run.remove_tag(tag)
64+
run_list = openml.runs.list_runs(tag=tag)
65+
self.assertEqual(len(run_list), 0)

tests/test_tasks/test_task.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import sys
2-
import types
32

43
if sys.version_info[0] >= 3:
54
from unittest import mock
65
else:
76
import mock
87

8+
from time import time
99
import numpy as np
1010

1111
import openml
@@ -45,6 +45,19 @@ def test_get_X_and_Y(self):
4545
self.assertIsInstance(Y, np.ndarray)
4646
self.assertEqual(Y.dtype, float)
4747

48+
def test_tagging(self):
49+
task = openml.tasks.get_task(1)
50+
tag = "testing_tag_{}_{}".format(self.id(), time())
51+
task_list = openml.tasks.list_tasks(tag=tag)
52+
self.assertEqual(len(task_list), 0)
53+
task.push_tag(tag)
54+
task_list = openml.tasks.list_tasks(tag=tag)
55+
self.assertEqual(len(task_list), 1)
56+
self.assertIn(1, task_list)
57+
task.remove_tag(tag)
58+
task_list = openml.tasks.list_tasks(tag=tag)
59+
self.assertEqual(len(task_list), 0)
60+
4861
def test_get_train_and_test_split_indices(self):
4962
openml.config.set_cache_directory(self.static_cache_dir)
5063
task = openml.tasks.get_task(1882)
@@ -62,4 +75,3 @@ def test_get_train_and_test_split_indices(self):
6275
task.get_train_test_split_indices, 10, 0)
6376
self.assertRaisesRegexp(ValueError, "Repeat 10 not known",
6477
task.get_train_test_split_indices, 0, 10)
65-

0 commit comments

Comments
 (0)