@@ -491,7 +491,7 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
491491 ----------
492492 model : sklearn model
493493 char_lim : int
494- Specifying the max length of the returned string
494+ Specifying the max length of the returned string.
495495 OpenML servers have a constraint of 1024 characters for the 'description' field.
496496
497497 Returns
@@ -508,6 +508,11 @@ def match_format(s):
508508 # trim till 'Read more'
509509 pattern = "Read more in the :ref:"
510510 index = s .index (pattern )
511+ s = s [:index ]
512+ # trimming docstring to be within char_lim
513+ if len (s ) > char_lim :
514+ s = "{}..." .format (s [:char_lim - 3 ])
515+ return s .strip ()
511516 except ValueError :
512517 pass
513518 try :
@@ -678,7 +683,7 @@ def match_format(s):
678683 print ("{} {}" .format (match_format ("Parameters" ), e ))
679684 return None
680685
681- headings = ["Attributes" , "See also" , "Note" , "References" ]
686+ headings = ["Attributes" , "Notes" , " See also" , "Note" , "References" ]
682687 for h in headings :
683688 try :
684689 # to find end of Parameters section
@@ -693,12 +698,15 @@ def match_format(s):
693698 s = s [index1 :index2 ]
694699 return s .strip ()
695700
696- def _extract_sklearn_param_info (self , model ) -> Union [None , Dict ]:
701+ def _extract_sklearn_param_info (self , model , char_lim = 1024 ) -> Union [None , Dict ]:
697702 '''Parses parameter type and description from sklearn dosctring
698703
699704 Parameters
700705 ----------
701706 model : sklearn model
707+ char_lim : int
708+ Specifying the max length of the returned string.
709+ OpenML servers have a constraint of 1024 characters string fields.
702710
703711 Returns
704712 -------
@@ -711,7 +719,7 @@ def _extract_sklearn_param_info(self, model) -> Union[None, Dict]:
711719
712720 n = re .compile ("[.]*\n " , flags = IGNORECASE )
713721 lines = n .split (docstring )
714- p = re .compile ("[a-z0-9_ ]+ : [a-z0-9_]+[a-z0-9_ ]*" , flags = IGNORECASE )
722+ p = re .compile ("[a-z0-9_ ]+ : [a-z0-9_' ]+[a-z0-9_ ]*" , flags = IGNORECASE )
715723 parameter_docs = OrderedDict () # type: Dict
716724 description = [] # type: List
717725
@@ -721,11 +729,15 @@ def _extract_sklearn_param_info(self, model) -> Union[None, Dict]:
721729 if param != []:
722730 if len (description ) > 0 :
723731 description [- 1 ] = '\n ' .join (description [- 1 ]).strip ()
732+ if len (description [- 1 ]) > char_lim :
733+ description [- 1 ] = "{}..." .format (description [- 1 ][:char_lim - 3 ])
724734 description .append ([])
725735 else :
726736 if len (description ) > 0 :
727737 description [- 1 ].append (s )
728738 description [- 1 ] = '\n ' .join (description [- 1 ]).strip ()
739+ if len (description [- 1 ]) > char_lim :
740+ description [- 1 ] = "{}..." .format (description [- 1 ][:char_lim - 3 ])
729741
730742 # collecting parameters and their types
731743 matches = p .findall (docstring )
0 commit comments