Skip to content

Commit 9b5d382

Browse files
committed
Making suggested changes
1 parent 235ded8 commit 9b5d382

3 files changed

Lines changed: 78 additions & 23 deletions

File tree

openml/extensions/sklearn/extension.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,6 @@ def match_format(s):
503503
s = inspect.getdoc(model)
504504
if s is None:
505505
return ''
506-
if len(s) <= char_lim:
507-
# if the fetched docstring is smaller than char_lim, no trimming required
508-
return s.strip()
509506
try:
510507
# trim till 'Read more'
511508
pattern = "Read more in the :ref:"
@@ -516,13 +513,16 @@ def match_format(s):
516513
s = "{}...".format(s[:char_lim - 3])
517514
return s.strip()
518515
except ValueError:
516+
logging.info("'Read more' not found in descriptions. "
517+
"Trying to trim till 'Parameters' if available in docstring.")
519518
pass
520519
try:
521520
# if 'Read more' doesn't exist, trim till 'Parameters'
522521
pattern = "Parameters"
523522
index = s.index(match_format(pattern))
524523
except ValueError:
525524
# returning full docstring
525+
logging.info("'Parameters' not found in docstring. Omitting docstring trimming.")
526526
index = len(s)
527527
s = s[:index]
528528
# trimming docstring to be within char_lim
@@ -556,7 +556,7 @@ def match_format(s):
556556
index1 = s.index(match_format("Parameters"))
557557
except ValueError as e:
558558
# when sklearn docstring has no 'Parameters' section
559-
print("{} {}".format(match_format("Parameters"), e))
559+
logging.info("{} {}".format(match_format("Parameters"), e))
560560
return None
561561

562562
headings = ["Attributes", "Notes", "See also", "Note", "References"]
@@ -566,7 +566,7 @@ def match_format(s):
566566
index2 = s.index(match_format(h))
567567
break
568568
except ValueError:
569-
print("{} not available in docstring".format(h))
569+
logging.info("{} not available in docstring".format(h))
570570
continue
571571
else:
572572
# in the case only 'Parameters' exist, trim till end of docstring
@@ -909,10 +909,6 @@ def flatten_all(list_):
909909
parameters[k] = None
910910

911911
if parameters_docs is not None:
912-
# print(type(model))
913-
# print(sorted(parameters_docs.keys()))
914-
# print(sorted(model_parameters.keys()))
915-
# print()
916912
data_type, description = parameters_docs[k]
917913
parameters_meta_info[k] = OrderedDict((('description', description),
918914
('data_type', data_type)))

openml/flows/functions.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@ def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
308308
def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
309309
ignore_parameter_values_on_older_children: str = None,
310310
ignore_parameter_values: bool = False,
311-
ignore_custom_name_if_none: bool = False) -> None:
311+
ignore_custom_name_if_none: bool = False,
312+
check_description: bool = True) -> None:
312313
"""Check equality of two flows.
313314
314315
Two flows are equal if their all keys which are not set by the server
@@ -327,8 +328,11 @@ def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
327328
ignore_parameter_values : bool
328329
Whether to ignore parameter values when comparing flows.
329330
330-
ignore_custom_name_if_none : bool
331+
ignore_custom_name_if_none : bool
331332
Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.
333+
334+
check_description : bool
335+
Whether to ignore matching of flow descriptions.
332336
"""
333337
if not isinstance(flow1, OpenMLFlow):
334338
raise TypeError('Argument 1 must be of type OpenMLFlow, but is %s' %
@@ -366,7 +370,7 @@ def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
366370
ignore_custom_name_if_none)
367371
elif key == '_extension':
368372
continue
369-
elif key == 'description':
373+
elif check_description and key == 'description':
370374
# to ignore matching of descriptions since sklearn based flows may have
371375
# altering docstrings and is not guaranteed to be consistent
372376
continue
@@ -404,8 +408,8 @@ def assert_flows_equal(flow1: OpenMLFlow, flow2: OpenMLFlow,
404408
elif key == 'parameters_meta_info':
405409
# this value is a dictionary where each key is a parameter name, containing another
406410
# dictionary with keys specifying the parameter's 'description' and 'data_type'
407-
# check of descriptions can be ignored since that might change
408-
# data type check can be ignored if one of them is not defined, i.e., None
411+
# checking parameter descriptions can be ignored since that might change
412+
# data type check can also be ignored if one of them is not defined, i.e., None
409413
params1 = set(flow1.parameters_meta_info.keys())
410414
params2 = set(flow2.parameters_meta_info.keys())
411415
if params1 != params2:

tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def test_serialize_model(self):
7575

7676
fixture_name = 'sklearn.tree.tree.DecisionTreeClassifier'
7777
fixture_short_name = 'sklearn.DecisionTreeClassifier'
78-
fixture_description = self.extension._get_sklearn_description(model)
78+
# str obtained from self.extension._get_sklearn_description(model)
79+
fixture_description = 'A decision tree classifier.'
7980
version_fixture = 'sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9' \
8081
% sklearn.__version__
8182
# min_impurity_decrease has been introduced in 0.20
@@ -143,7 +144,8 @@ def test_serialize_model_clustering(self):
143144

144145
fixture_name = 'sklearn.cluster.k_means_.KMeans'
145146
fixture_short_name = 'sklearn.KMeans'
146-
fixture_description = self.extension._get_sklearn_description(model)
147+
# str obtained from self.extension._get_sklearn_description(model)
148+
fixture_description = 'K-Means clustering'
147149
version_fixture = 'sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9' \
148150
% sklearn.__version__
149151
# n_jobs default has changed to None in 0.20
@@ -207,11 +209,18 @@ def test_serialize_model_with_subcomponent(self):
207209
'(base_estimator=sklearn.tree.tree.DecisionTreeClassifier)'
208210
fixture_class_name = 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'
209211
fixture_short_name = 'sklearn.AdaBoostClassifier'
210-
fixture_description = self.extension._get_sklearn_description(model)
212+
# str obtained from self.extension._get_sklearn_description(model)
213+
fixture_description = 'An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a '\
214+
'meta-estimator that begins by fitting a\nclassifier on the original'\
215+
' dataset and then fits additional copies of the\nclassifier on the '\
216+
'same dataset but where the weights of incorrectly\nclassified '\
217+
'instances are adjusted such that subsequent classifiers focus\nmore'\
218+
' on difficult cases.\n\nThis class implements the algorithm known '\
219+
'as AdaBoost-SAMME [2].'
211220
fixture_subcomponent_name = 'sklearn.tree.tree.DecisionTreeClassifier'
212221
fixture_subcomponent_class_name = 'sklearn.tree.tree.DecisionTreeClassifier'
213-
fixture_subcomponent_description = \
214-
self.extension._get_sklearn_description(model.base_estimator)
222+
# str obtained from self.extension._get_sklearn_description(model.base_estimator)
223+
fixture_subcomponent_description = 'A decision tree classifier.'
215224
fixture_structure = {
216225
fixture_name: [],
217226
'sklearn.tree.tree.DecisionTreeClassifier': ['base_estimator']
@@ -265,7 +274,20 @@ def test_serialize_pipeline(self):
265274
'scaler=sklearn.preprocessing.data.StandardScaler,' \
266275
'dummy=sklearn.dummy.DummyClassifier)'
267276
fixture_short_name = 'sklearn.Pipeline(StandardScaler,DummyClassifier)'
268-
fixture_description = self.extension._get_sklearn_description(model)
277+
# str obtained from self.extension._get_sklearn_description(model)
278+
fixture_description = "Pipeline of transforms with a final estimator.\n\nSequentially " \
279+
"apply a list of transforms and a final estimator.\nIntermediate "\
280+
"steps of the pipeline must be 'transforms', that is, they\nmust "\
281+
"implement fit and transform methods.\nThe final estimator only "\
282+
"needs to implement fit.\nThe transformers in the pipeline can be "\
283+
"cached using ``memory`` argument.\n\nThe purpose of the pipeline is"\
284+
" to assemble several steps that can be\ncross-validated together "\
285+
"while setting different parameters.\nFor this, it enables setting "\
286+
"parameters of the various steps using their\nnames and the "\
287+
"parameter name separated by a '__', as in the example below.\nA "\
288+
"step's estimator may be replaced entirely by setting the "\
289+
"parameter\nwith its name to another estimator, or a transformer "\
290+
"removed by setting\nit to 'passthrough' or ``None``."
269291
fixture_structure = {
270292
fixture_name: [],
271293
'sklearn.preprocessing.data.StandardScaler': ['scaler'],
@@ -354,7 +376,20 @@ def test_serialize_pipeline_clustering(self):
354376
'scaler=sklearn.preprocessing.data.StandardScaler,' \
355377
'clusterer=sklearn.cluster.k_means_.KMeans)'
356378
fixture_short_name = 'sklearn.Pipeline(StandardScaler,KMeans)'
357-
fixture_description = self.extension._get_sklearn_description(model)
379+
# str obtained from self.extension._get_sklearn_description(model)
380+
fixture_description = "Pipeline of transforms with a final estimator.\n\nSequentially "\
381+
"apply a list of transforms and a final estimator.\nIntermediate "\
382+
"steps of the pipeline must be 'transforms', that is, they\nmust "\
383+
"implement fit and transform methods.\nThe final estimator only "\
384+
"needs to implement fit.\nThe transformers in the pipeline can be "\
385+
"cached using ``memory`` argument.\n\nThe purpose of the pipeline is"\
386+
" to assemble several steps that can be\ncross-validated together "\
387+
"while setting different parameters.\nFor this, it enables setting "\
388+
"parameters of the various steps using their\nnames and the "\
389+
"parameter name separated by a '__', as in the example below.\nA "\
390+
"step's estimator may be replaced entirely by setting the parameter"\
391+
"\nwith its name to another estimator, or a transformer removed "\
392+
"by setting\nit to 'passthrough' or ``None``."
358393
fixture_structure = {
359394
fixture_name: [],
360395
'sklearn.preprocessing.data.StandardScaler': ['scaler'],
@@ -446,7 +481,14 @@ def test_serialize_column_transformer(self):
446481
'numeric=sklearn.preprocessing.data.StandardScaler,' \
447482
'nominal=sklearn.preprocessing._encoders.OneHotEncoder)'
448483
fixture_short_name = 'sklearn.ColumnTransformer'
449-
fixture_description = self.extension._get_sklearn_description(model)
484+
# str obtained from self.extension._get_sklearn_description(model)
485+
fixture_description = 'Applies transformers to columns of an array or pandas DataFrame.\n' \
486+
'\nThis estimator allows different columns or column subsets of the '\
487+
'input\nto be transformed separately and the features generated by '\
488+
'each transformer\nwill be concatenated to form a single feature '\
489+
'space.\nThis is useful for heterogeneous or columnar data, to '\
490+
'combine several\nfeature extraction mechanisms or transformations '\
491+
'into a single transformer.'
450492
fixture_structure = {
451493
fixture: [],
452494
'sklearn.preprocessing.data.StandardScaler': ['numeric'],
@@ -505,7 +547,20 @@ def test_serialize_column_transformer_pipeline(self):
505547
fixture_name: [],
506548
}
507549

508-
fixture_description = self.extension._get_sklearn_description(model)
550+
# str obtained from self.extension._get_sklearn_description(model)
551+
fixture_description = "Pipeline of transforms with a final estimator.\n\nSequentially "\
552+
"apply a list of transforms and a final estimator.\nIntermediate "\
553+
"steps of the pipeline must be 'transforms', that is, they\nmust "\
554+
"implement fit and transform methods.\nThe final estimator only "\
555+
"needs to implement fit.\nThe transformers in the pipeline can be "\
556+
"cached using ``memory`` argument.\n\nThe purpose of the pipeline "\
557+
"is to assemble several steps that can be\ncross-validated together "\
558+
"while setting different parameters.\nFor this, it enables setting "\
559+
"parameters of the various steps using their\nnames and the "\
560+
"parameter name separated by a '__', as in the example below.\nA "\
561+
"step's estimator may be replaced entirely by setting the parameter"\
562+
"\nwith its name to another estimator, or a transformer removed by "\
563+
"setting\nit to 'passthrough' or ``None``."
509564
serialization = self.extension.model_to_flow(model)
510565
structure = serialization.get_structure('name')
511566
self.assertEqual(serialization.name, fixture_name)

0 commit comments

Comments
 (0)