Skip to content

Commit 6dc4345

Browse files
committed
Trimming of all strings to be uploaded
1 parent d90f333 commit 6dc4345

1 file changed

Lines changed: 16 additions & 4 deletions

File tree

openml/extensions/sklearn/extension.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
491491
----------
492492
model : sklearn model
493493
char_lim : int
494-
Specifying the max length of the returned string
494+
Specifying the max length of the returned string.
495495
OpenML servers have a constraint of 1024 characters for the 'description' field.
496496
497497
Returns
@@ -508,6 +508,11 @@ def match_format(s):
508508
# trim till 'Read more'
509509
pattern = "Read more in the :ref:"
510510
index = s.index(pattern)
511+
s = s[:index]
512+
# trimming docstring to be within char_lim
513+
if len(s) > char_lim:
514+
s = "{}...".format(s[:char_lim - 3])
515+
return s.strip()
511516
except ValueError:
512517
pass
513518
try:
@@ -678,7 +683,7 @@ def match_format(s):
678683
print("{} {}".format(match_format("Parameters"), e))
679684
return None
680685

681-
headings = ["Attributes", "See also", "Note", "References"]
686+
headings = ["Attributes", "Notes", "See also", "Note", "References"]
682687
for h in headings:
683688
try:
684689
# to find end of Parameters section
@@ -693,12 +698,15 @@ def match_format(s):
693698
s = s[index1:index2]
694699
return s.strip()
695700

696-
def _extract_sklearn_param_info(self, model) -> Union[None, Dict]:
701+
def _extract_sklearn_param_info(self, model, char_lim=1024) -> Union[None, Dict]:
697702
'''Parses parameter type and description from sklearn dosctring
698703
699704
Parameters
700705
----------
701706
model : sklearn model
707+
char_lim : int
708+
Specifying the max length of the returned string.
709+
OpenML servers have a constraint of 1024 characters string fields.
702710
703711
Returns
704712
-------
@@ -711,7 +719,7 @@ def _extract_sklearn_param_info(self, model) -> Union[None, Dict]:
711719

712720
n = re.compile("[.]*\n", flags=IGNORECASE)
713721
lines = n.split(docstring)
714-
p = re.compile("[a-z0-9_ ]+ : [a-z0-9_]+[a-z0-9_ ]*", flags=IGNORECASE)
722+
p = re.compile("[a-z0-9_ ]+ : [a-z0-9_']+[a-z0-9_ ]*", flags=IGNORECASE)
715723
parameter_docs = OrderedDict() # type: Dict
716724
description = [] # type: List
717725

@@ -721,11 +729,15 @@ def _extract_sklearn_param_info(self, model) -> Union[None, Dict]:
721729
if param != []:
722730
if len(description) > 0:
723731
description[-1] = '\n'.join(description[-1]).strip()
732+
if len(description[-1]) > char_lim:
733+
description[-1] = "{}...".format(description[-1][:char_lim - 3])
724734
description.append([])
725735
else:
726736
if len(description) > 0:
727737
description[-1].append(s)
728738
description[-1] = '\n'.join(description[-1]).strip()
739+
if len(description[-1]) > char_lim:
740+
description[-1] = "{}...".format(description[-1][:char_lim - 3])
729741

730742
# collecting parameters and their types
731743
matches = p.findall(docstring)

0 commit comments

Comments
 (0)