@@ -596,29 +596,38 @@ def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]
596596 n = re .compile ("[.]*\n " , flags = IGNORECASE )
597597 lines = n .split (docstring )
598598 p = re .compile ("[a-z0-9_ ]+ : [a-z0-9_']+[a-z0-9_ ]*" , flags = IGNORECASE )
599- parameter_docs = OrderedDict () # type: Dict
600- description = [] # type: List
599+ # The above regular expression is designed to detect sklearn parameter names and type
600+ # in the format of [variable_name][space]:[space][type]
601+ # The expectation is that the parameter description for this detected parameter will
602+ # be all the lines in the docstring till the regex finds another parameter match
601603
602604 # collecting parameters and their descriptions
605+ description = [] # type: List
603606 for i , s in enumerate (lines ):
604607 param = p .findall (s )
605608 if param != []:
606- if len (description ) > 0 :
607- description [- 1 ] = '\n ' .join (description [- 1 ]).strip ()
608- if len (description [- 1 ]) > char_lim :
609- description [- 1 ] = "{}..." .format (description [- 1 ][:char_lim - 3 ])
610- description .append ([])
609+ # a parameter definition is found by regex
610+ # creating placeholder when parameter found which will be a list of strings
611+ # string descriptions will be appended in subsequent iterations
612+ # till another parameter is found and a new placeholder is created
613+ placeholder = ['' ] # type: List[str]
614+ description .append (placeholder )
611615 else :
612- if len (description ) > 0 :
616+ if len (description ) > 0 : # description=[] means no parameters found yet
617+ # appending strings to the placeholder created when parameter found
613618 description [- 1 ].append (s )
614- description [- 1 ] = '\n ' .join (description [- 1 ]).strip ()
615- if len (description [- 1 ]) > char_lim :
616- description [- 1 ] = "{}..." .format (description [- 1 ][:char_lim - 3 ])
619+ for i in range (len (description )):
620+ # concatenating parameter description strings
621+ description [i ] = '\n ' .join (description [i ]).strip ()
622+ # limiting all parameter descriptions to accepted OpenML string length
623+ if len (description [i ]) > char_lim :
624+ description [i ] = "{}..." .format (description [i ][:char_lim - 3 ])
617625
618626 # collecting parameters and their types
627+ parameter_docs = OrderedDict () # type: Dict
619628 matches = p .findall (docstring )
620629 for i , param in enumerate (matches ):
621- key , value = param .split (':' )
630+ key , value = str ( param ) .split (':' )
622631 parameter_docs [key .strip ()] = [value .strip (), description [i ]]
623632
624633 # to avoid KeyError for missing parameters
0 commit comments