Skip to content

Commit 7cd5741

Browse files
committed
FIX serialize each parameter as json
1 parent c4f73e1 commit 7cd5741

5 files changed

Lines changed: 45 additions & 30 deletions

File tree

openml/flows/sklearn.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ def serialize_object(self, o):
3737
# The check for bool has to be before the check for int, otherwise,
3838
# isinstance will think the bool is an int and convert the bool will
3939
# be converted to a string which can't be parsed by json.loads
40-
rval = json.dumps(o)
40+
rval = o
4141
elif isinstance(o, int):
42-
rval = repr(o)
42+
rval = o
4343
elif isinstance(o, float):
44-
rval = repr(o)
44+
rval = o
4545
elif isinstance(o, dict):
4646
rval = {}
4747
for key, value in o.items():
@@ -51,7 +51,7 @@ def serialize_object(self, o):
5151
key = self.serialize_object(key)
5252
value = self.serialize_object(value)
5353
rval[key] = value
54-
rval = json.dumps(rval)
54+
rval = rval
5555
elif isinstance(o, type):
5656
rval = self.serialize_type(o)
5757
elif isinstance(o, scipy.stats.distributions.rv_frozen):
@@ -187,7 +187,7 @@ def serialize_model(self, model):
187187
'value': {'key': k,
188188
'step_name': None}}
189189
component_reference = self.serialize_object(component_reference)
190-
parameters[k] = (component_reference)
190+
parameters[k] = json.dumps(component_reference)
191191

192192
else:
193193
# Since Pipeline and FeatureUnion also return estimators and
@@ -201,6 +201,7 @@ def serialize_model(self, model):
201201

202202
# a regular hyperparameter
203203
if not (hasattr(rval, '__len__') and len(rval) == 0):
204+
rval = json.dumps(rval)
204205
parameters[k] = rval
205206
else:
206207
parameters[k] = None
@@ -279,9 +280,8 @@ def serialize_type(self, o):
279280
np.int: 'np.int',
280281
np.int32: 'np.int32',
281282
np.int64: 'np.int64'}
282-
jason = json.dumps({'oml:serialized_object': 'type',
283-
'value': mapping[o]})
284-
return jason
283+
return {'oml:serialized_object': 'type',
284+
'value': mapping[o]}
285285

286286
def deserialize_type(self, o, **kwargs):
287287
mapping = {'float': float,
@@ -300,9 +300,8 @@ def serialize_rv_frozen(self, o):
300300
a = o.a
301301
b = o.b
302302
dist = o.dist.__class__.__module__ + '.' + o.dist.__class__.__name__
303-
jason = json.dumps({'oml:serialized_object': 'rv_frozen',
304-
'value': {'dist': dist, 'a': a, 'b': b, 'args': args, 'kwds': kwds}})
305-
return jason
303+
return {'oml:serialized_object': 'rv_frozen',
304+
'value': {'dist': dist, 'a': a, 'b': b, 'args': args, 'kwds': kwds}}
306305

307306
def deserialize_rv_frozen(self, o, **kwargs):
308307
args = o['args']
@@ -327,9 +326,8 @@ def deserialize_rv_frozen(self, o, **kwargs):
327326

328327
def serialize_function(self, o):
329328
name = o.__module__ + '.' + o.__name__
330-
jason = json.dumps({'oml:serialized_object': 'function',
331-
'value': name})
332-
return jason
329+
return {'oml:serialized_object': 'function',
330+
'value': name}
333331

334332
def deserialize_function(self, name, **kwargs):
335333
module_name = name.rsplit('.', 1)

openml/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def setUp(self):
4444
openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
4545

4646
self.production_server = openml.config.server
47-
self.test_server = "http://test.openml.org/api/v1/xml"
47+
self.test_server = "http://capa.win.tue.nl/api/v1/xml"
4848
openml.config.server = self.test_server
4949
openml.config.set_cache_directory(self.workdir, self.workdir)
5050

tests/flows/test_flow.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def test_publish_flow(self, name_mock):
114114

115115
flow = openml.OpenMLFlow(name='Test',
116116
description="test description",
117+
external_version=str(sklearn.__version__),
117118
model=sklearn.dummy.DummyClassifier())
118119
name_mock.return_value = 'TEST%s%s' % (sentinel, flow.name)
119120

@@ -128,6 +129,12 @@ def test_sklearn_to_upload_to_flow(self, name_mock):
128129
md5 = hashlib.md5()
129130
md5.update(str(time.time()).encode('utf-8'))
130131
sentinel = md5.hexdigest()[:10]
132+
def side_effect(self):
133+
if sentinel in self.name:
134+
return self.name
135+
else:
136+
return 'TEST%s%s' % (sentinel, self.name)
137+
name_mock.side_effect = side_effect
131138

132139
# Test a more complicated flow
133140
scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
@@ -141,7 +148,6 @@ def test_sklearn_to_upload_to_flow(self, name_mock):
141148
rs = sklearn.model_selection.RandomizedSearchCV(
142149
estimator=model, param_distributions=parameter_grid)
143150
flow = openml.flows.create_flow_from_model(rs, SklearnToFlowConverter())
144-
name_mock.return_value = 'TEST%s%s' % (sentinel, flow.name)
145151

146152
flow.publish()
147153
self.assertIsInstance(flow.flow_id, int)

tests/flows/test_sklearn.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import unittest
44

55
import numpy as np
6+
import sklearn.base
67
import sklearn.datasets
78
import scipy.stats
89
import sklearn.decomposition
@@ -18,6 +19,13 @@
1819
from openml.flows import OpenMLFlow
1920

2021

22+
class Model(sklearn.base.BaseEstimator):
23+
def __init__(self, boolean, integer, floating_point_value):
24+
self.boolean = boolean
25+
self.integer = integer
26+
self.floating_point_value = floating_point_value
27+
28+
2129
class TestSklearn(unittest.TestCase):
2230

2331
def setUp(self):
@@ -34,18 +42,18 @@ def test_serialize_model(self):
3442
fixture_name = 'sklearn.tree.tree.DecisionTreeClassifier'
3543
fixture_description = 'Automatically created sub-component.'
3644
fixture_parameters = \
37-
OrderedDict((('class_weight', None),
38-
('criterion', 'entropy'),
39-
('max_depth', None),
40-
('max_features', 'auto'),
45+
OrderedDict((('class_weight', 'null'),
46+
('criterion', '"entropy"'),
47+
('max_depth', 'null'),
48+
('max_features', '"auto"'),
4149
('max_leaf_nodes', '2000'),
4250
('min_impurity_split', '1e-07'),
4351
('min_samples_leaf', '1'),
4452
('min_samples_split', '2'),
4553
('min_weight_fraction_leaf', '0.0'),
4654
('presort', 'false'),
47-
('random_state', None),
48-
('splitter', 'best')))
55+
('random_state', 'null'),
56+
('splitter', '"best"')))
4957

5058
serialization = self.converter.serialize_object(model)
5159

@@ -73,7 +81,7 @@ def test_serialize_model_with_subcomponent(self):
7381

7482
self.assertEqual(serialization.name, fixture_name)
7583
self.assertEqual(serialization.description, fixture_description)
76-
self.assertEqual(serialization.parameters['algorithm'], 'SAMME.R')
84+
self.assertEqual(serialization.parameters['algorithm'], '"SAMME.R"')
7785
self.assertIsInstance(serialization.parameters['base_estimator'], str)
7886
self.assertEqual(serialization.parameters['learning_rate'], '1.0')
7987
self.assertEqual(serialization.parameters['n_estimators'], '100')
@@ -294,11 +302,14 @@ def test_serialize_resampling(self):
294302
self.assertIsNot(deserialized, kfold)
295303

296304
def test_hypothetical_parameter_values(self):
297-
values = ['true', '1', '0.1']
298-
for value in values:
299-
serialized = self.converter.serialize_object(value)
300-
deserialized = self.converter.deserialize_object(value)
301-
self.assertEqual(deserialized, value)
305+
# Can only be checked inside a model
306+
307+
model = Model('true', '1', '0.1')
308+
309+
serialized = self.converter.serialize_object(model)
310+
deserialized = self.converter.deserialize_object(serialized)
311+
self.assertEqual(deserialized.get_params(), model.get_params())
312+
self.assertIsNot(deserialized, model)
302313

303314

304315

tests/tasks/test_task_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ def _check_task(self, task):
5252
def test_list_tasks_by_type(self):
5353
ttid=3
5454
tasks = openml.tasks.list_tasks(task_type_id=ttid)
55-
self.assertGreaterEqual(len(tasks), 300)
55+
self.assertGreaterEqual(len(tasks), 286)
5656
for tid in tasks:
5757
print(tasks[tid])
5858
self.assertEquals(ttid, tasks[tid]["ttid"])
5959
self._check_task(tasks[tid])
6060

6161
def test_list_tasks_by_tag(self):
6262
tasks = openml.tasks.list_tasks(tag='basic')
63-
self.assertGreaterEqual(len(tasks), 57)
63+
self.assertGreaterEqual(len(tasks), 54)
6464
for tid in tasks:
6565
self._check_task(tasks[tid])
6666

0 commit comments

Comments
 (0)