Skip to content

Commit 7c2fe27

Browse files
authored
Merge pull request #260 from openml/update_flow_parameter_checks
Change option to check for parameter names instead of not checking parameters at all
2 parents 12458a3 + 429f451 commit 7c2fe27

6 files changed

Lines changed: 111 additions & 14 deletions

File tree

openml/flows/flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def __init__(self, name, description, model, components, parameters,
105105
keys_parameters = set(parameters.keys())
106106
keys_parameters_meta_info = set(parameters_meta_info.keys())
107107
if len(keys_parameters.difference(keys_parameters_meta_info)) > 0:
108-
raise ValueError('Parameter %s only in parameters, but not in'
108+
raise ValueError('Parameter %s only in parameters, but not in '
109109
'parameters_meta_info.' %
110110
str(keys_parameters.difference(
111111
keys_parameters_meta_info)))

openml/flows/functions.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,9 @@ def _check_flow_for_server_id(flow):
145145
stack.append(component)
146146

147147

148-
def assert_flows_equal(flow1, flow2, ignore_parameters_on_older_children=None,
149-
ignore_parameters=False):
148+
def assert_flows_equal(flow1, flow2,
149+
ignore_parameter_values_on_older_children=None,
150+
ignore_parameter_values=False):
150151
"""Check equality of two flows.
151152
152153
Two flows are equal if their all keys which are not set by the server
@@ -158,11 +159,11 @@ def assert_flows_equal(flow1, flow2, ignore_parameters_on_older_children=None,
158159
159160
flow2 : OpenMLFlow
160161
161-
ignore_parameters_on_older_children : str
162+
ignore_parameter_values_on_older_children : str
162163
If set to ``OpenMLFlow.upload_date``, ignores parameters in a child
163164
flow if it's upload date predates the upload date of the parent flow.
164165
165-
ignore_parameters : bool
166+
ignore_parameter_values : bool
166167
Whether to ignore parameter values when comparing flows.
167168
"""
168169
if not isinstance(flow1, OpenMLFlow):
@@ -196,22 +197,35 @@ def assert_flows_equal(flow1, flow2, ignore_parameters_on_older_children=None,
196197
raise ValueError('Component %s only available in '
197198
'argument2, but not in argument1.' % name)
198199
assert_flows_equal(attr1[name], attr2[name],
199-
ignore_parameters_on_older_children,
200-
ignore_parameters)
200+
ignore_parameter_values_on_older_children,
201+
ignore_parameter_values)
201202

202203
else:
203204
if key == 'parameters':
204-
if ignore_parameters_on_older_children:
205+
if ignore_parameter_values or \
206+
ignore_parameter_values_on_older_children:
207+
parameters_flow_1 = set(flow1.parameters.keys())
208+
parameters_flow_2 = set(flow2.parameters.keys())
209+
symmetric_difference = parameters_flow_1 ^ parameters_flow_2
210+
if len(symmetric_difference) > 0:
211+
raise ValueError('Flow %s: parameter set of flow '
212+
'differs from the parameters stored '
213+
'on the server.' % flow1.name)
214+
215+
if ignore_parameter_values_on_older_children:
205216
upload_date_current_flow = dateutil.parser.parse(
206217
flow1.upload_date)
207218
upload_date_parent_flow = dateutil.parser.parse(
208-
ignore_parameters_on_older_children)
219+
ignore_parameter_values_on_older_children)
209220
if upload_date_current_flow < upload_date_parent_flow:
210221
continue
211-
elif ignore_parameters:
222+
223+
if ignore_parameter_values:
224+
# Continue needs to be done here as the first if
225+
# statement triggers in both special cases
212226
continue
213227

214228
if attr1 != attr2:
215229
raise ValueError("Flow %s: values for attribute '%s' differ: "
216-
"'%s' vs '%s'." %
230+
"'%s'\nvs\n'%s'." %
217231
(str(flow1.name), str(key), str(attr1), str(attr2)))

openml/runs/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def _publish_flow_if_necessary(flow):
127127
server_flow = get_flow(flow_id)
128128
openml.flows.flow._copy_server_fields(server_flow, flow)
129129
openml.flows.assert_flows_equal(flow, server_flow,
130-
ignore_parameters=True)
130+
ignore_parameter_values=True)
131131
else:
132132
raise e
133133

tests/test_flows/test_flow.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,8 @@ def test_publish_error(self, api_call_mock, get_flow_mock):
205205
"New flow ID is 1. Please check manually and remove " \
206206
"the flow if necessary! Error is:\n" \
207207
"'Flow sklearn.ensemble.forest.RandomForestClassifier: values for attribute 'name' differ: " \
208-
"'sklearn.ensemble.forest.RandomForestClassifier' vs 'sklearn.ensemble.forest.RandomForestClassifie'.'"
208+
"'sklearn.ensemble.forest.RandomForestClassifier'" \
209+
"\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
209210

210211
self.assertEqual(context_manager.exception.args[0], fixture)
211212
self.assertEqual(api_call_mock.call_count, 2)

tests/test_flows/test_flow_functions.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,85 @@ def test_are_flows_equal(self):
111111
self.assertRaises(ValueError,
112112
openml.flows.functions.assert_flows_equal,
113113
parent_flow, new_flow)
114+
115+
def test_are_flows_equal_ignore_parameter_values(self):
116+
paramaters = OrderedDict((('a', 5), ('b', 6)))
117+
parameters_meta_info = OrderedDict((('a', None), ('b', None)))
118+
119+
flow = openml.flows.OpenMLFlow(name='Test',
120+
description='Test flow',
121+
model=None,
122+
components=OrderedDict(),
123+
parameters=paramaters,
124+
parameters_meta_info=parameters_meta_info,
125+
external_version='1',
126+
tags=['abc', 'def'],
127+
language='English',
128+
dependencies='abc',
129+
class_name='Test',
130+
custom_name='Test')
131+
132+
openml.flows.functions.assert_flows_equal(flow, flow)
133+
openml.flows.functions.assert_flows_equal(flow, flow,
134+
ignore_parameter_values=True)
135+
136+
new_flow = copy.deepcopy(flow)
137+
new_flow.parameters['a'] = 7
138+
self.assertRaisesRegexp(ValueError, "values for attribute 'parameters' "
139+
"differ: 'OrderedDict\(\[\('a', "
140+
"5\), \('b', 6\)\]\)'\nvs\n"
141+
"'OrderedDict\(\[\('a', 7\), "
142+
"\('b', 6\)\]\)'",
143+
openml.flows.functions.assert_flows_equal,
144+
flow, new_flow)
145+
openml.flows.functions.assert_flows_equal(flow, new_flow,
146+
ignore_parameter_values=True)
147+
148+
del new_flow.parameters['a']
149+
self.assertRaisesRegexp(ValueError, "values for attribute 'parameters' "
150+
"differ: 'OrderedDict\(\[\('a', "
151+
"5\), \('b', 6\)\]\)'\nvs\n"
152+
"'OrderedDict\(\[\('b', 6\)\]\)'",
153+
openml.flows.functions.assert_flows_equal,
154+
flow, new_flow)
155+
self.assertRaisesRegexp(ValueError, "Flow Test: parameter set of flow "
156+
"differs from the parameters stored "
157+
"on the server.",
158+
openml.flows.functions.assert_flows_equal,
159+
flow, new_flow, ignore_parameter_values=True)
160+
161+
def test_are_flows_equal_ignore_if_older(self):
162+
paramaters = OrderedDict((('a', 5), ('b', 6)))
163+
parameters_meta_info = OrderedDict((('a', None), ('b', None)))
164+
165+
flow = openml.flows.OpenMLFlow(name='Test',
166+
description='Test flow',
167+
model=None,
168+
components=OrderedDict(),
169+
parameters=paramaters,
170+
parameters_meta_info=parameters_meta_info,
171+
external_version='1',
172+
tags=['abc', 'def'],
173+
language='English',
174+
dependencies='abc',
175+
class_name='Test',
176+
custom_name='Test',
177+
upload_date='2017-01-31T12-01-01')
178+
179+
openml.flows.functions.assert_flows_equal(flow, flow,
180+
ignore_parameter_values_on_older_children='2017-01-31T12-01-01')
181+
openml.flows.functions.assert_flows_equal(flow, flow,
182+
ignore_parameter_values_on_older_children=None)
183+
new_flow = copy.deepcopy(flow)
184+
new_flow.parameters['a'] = 7
185+
self.assertRaises(ValueError, openml.flows.functions.assert_flows_equal,
186+
flow, new_flow, ignore_parameter_values_on_older_children='2017-01-31T12-01-01')
187+
self.assertRaises(ValueError, openml.flows.functions.assert_flows_equal,
188+
flow, new_flow, ignore_parameter_values_on_older_children=None)
189+
190+
new_flow.upload_date = '2016-01-31T12-01-01'
191+
self.assertRaises(ValueError, openml.flows.functions.assert_flows_equal,
192+
flow, new_flow,
193+
ignore_parameter_values_on_older_children='2017-01-31T12-01-01')
194+
openml.flows.functions.assert_flows_equal(flow, flow,
195+
ignore_parameter_values_on_older_children=None)

tests/test_runs/test_run_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def _check_serialized_optimized_run(self, run_id):
6767
try:
6868
model_prime = openml.runs.initialize_model_from_trace(run_id, 0, 0)
6969
except openml.exceptions.OpenMLServerException as e:
70-
e.additional += '; run_id: ' + run_id
70+
e.additional = str(e.additional) + '; run_id: ' + str(run_id)
7171
raise e
7272

7373
run_prime = openml.runs.run_model_on_task(task, model_prime,

0 commit comments

Comments
 (0)