Skip to content

Commit 235ded8

Browse files
committed
Leaner implementation for parameter docstring
1 parent 41549b0 commit 235ded8

1 file changed

Lines changed: 21 additions & 12 deletions

File tree

openml/extensions/sklearn/extension.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -596,29 +596,38 @@ def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]
596596
n = re.compile("[.]*\n", flags=IGNORECASE)
597597
lines = n.split(docstring)
598598
p = re.compile("[a-z0-9_ ]+ : [a-z0-9_']+[a-z0-9_ ]*", flags=IGNORECASE)
599-
parameter_docs = OrderedDict() # type: Dict
600-
description = [] # type: List
599+
# The above regular expression is designed to detect sklearn parameter names and type
600+
# in the format of [variable_name][space]:[space][type]
601+
# The expectation is that the parameter description for this detected parameter will
602+
# be all the lines in the docstring till the regex finds another parameter match
601603

602604
# collecting parameters and their descriptions
605+
description = [] # type: List
603606
for i, s in enumerate(lines):
604607
param = p.findall(s)
605608
if param != []:
606-
if len(description) > 0:
607-
description[-1] = '\n'.join(description[-1]).strip()
608-
if len(description[-1]) > char_lim:
609-
description[-1] = "{}...".format(description[-1][:char_lim - 3])
610-
description.append([])
609+
# a parameter definition is found by regex
610+
# creating placeholder when parameter found which will be a list of strings
611+
# string descriptions will be appended in subsequent iterations
612+
# till another parameter is found and a new placeholder is created
613+
placeholder = [''] # type: List[str]
614+
description.append(placeholder)
611615
else:
612-
if len(description) > 0:
616+
if len(description) > 0: # description=[] means no parameters found yet
617+
# appending strings to the placeholder created when parameter found
613618
description[-1].append(s)
614-
description[-1] = '\n'.join(description[-1]).strip()
615-
if len(description[-1]) > char_lim:
616-
description[-1] = "{}...".format(description[-1][:char_lim - 3])
619+
for i in range(len(description)):
620+
# concatenating parameter description strings
621+
description[i] = '\n'.join(description[i]).strip()
622+
# limiting all parameter descriptions to accepted OpenML string length
623+
if len(description[i]) > char_lim:
624+
description[i] = "{}...".format(description[i][:char_lim - 3])
617625

618626
# collecting parameters and their types
627+
parameter_docs = OrderedDict() # type: Dict
619628
matches = p.findall(docstring)
620629
for i, param in enumerate(matches):
621-
key, value = param.split(':')
630+
key, value = str(param).split(':')
622631
parameter_docs[key.strip()] = [value.strip(), description[i]]
623632

624633
# to avoid KeyError for missing parameters

0 commit comments

Comments
 (0)