Skip to content

Commit 0b41df9

Browse files
committed
better handling of bootstrap plotting when infinities are present (#72)
1 parent 55e1f9c commit 0b41df9

4 files changed

Lines changed: 39 additions & 5 deletions

File tree

dabest/_classes.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def __init__(self, control, test, effect_size,
460460
'statistic_wilcoxon': nan}
461461
"""
462462

463-
from numpy import array, isnan
463+
from numpy import array, isnan, isinf
464464
from numpy import sort as npsort
465465
from numpy.random import choice, seed
466466

@@ -522,6 +522,20 @@ def __init__(self, control, test, effect_size,
522522
control, test, is_paired, effect_size,
523523
resamples, random_seed)
524524
self.__bootstraps = npsort(bootstraps)
525+
526+
# Added in v0.2.6.
527+
# Raises a UserWarning if there are any infiinities in the bootstraps.
528+
num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)])
529+
530+
if num_infinities > 0:
531+
warn_msg = "There are {} bootstrap(s) that are not defined. "\
532+
"This is likely due to smaple sample sizes. "\
533+
"The values in a bootstrap for a group will be more likely "\
534+
"to be all equal, with a resulting variance of zero. "\
535+
"The computation of Cohen's d and Hedges' g thus "\
536+
"involved a division by zero. "
537+
warnings.warn(warn_msg.format(num_infinities),
538+
category=UserWarning)
525539

526540
self.__bias_correction = ci2g.compute_meandiff_bias_correction(
527541
self.__bootstraps, self.__difference)

dabest/_stats_tools/confint_2group_diff.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,24 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,
159159

160160
# reset seed
161161
np.random.seed()
162-
162+
163+
# check whether there are any infinities in the bootstrap,
164+
# which likely indicates the sample sizes are too small as
165+
# the computation of Cohen's d and Hedges' g necessitated
166+
# a division by zero.
167+
# Added in v0.2.6.
168+
169+
# num_infinities = len(out[np.isinf(out)])
170+
# print(num_infinities)
171+
# if num_infinities > 0:
172+
# warn_msg = "There are {} bootstraps that are not defined. "\
173+
# "This is likely due to smaple sample sizes. "\
174+
# "The values in a bootstrap for a group will be more likely "\
175+
# "to be all equal, with a resulting variance of zero. "\
176+
# "The computation of Cohen's d and Hedges' g will therefore "\
177+
# "involved a division by zero. "
178+
# warnings.warn(warn_msg.format(num_infinities), category="UserWarning")
179+
163180
return out
164181

165182

dabest/_stats_tools/effsize.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,13 @@ def cohens_d(control, test, is_paired=False):
217217
# assume the two arrays are ordered already.
218218
delta = test - control
219219
M = np.mean(delta)
220-
return M / average_sd
220+
divisor = average_sd
221221

222222
else:
223223
M = np.mean(test) - np.mean(control)
224-
return M / pooled_sd
224+
divisor = pooled_sd
225+
226+
return M / divisor
225227

226228

227229

dabest/plotter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
471471
current_ci_high = results.bca_high[j]
472472

473473
# Create the violinplot.
474-
v = contrast_axes.violinplot(current_bootstrap,
474+
# New in v0.2.6: drop negative infinities before plotting.
475+
v = contrast_axes.violinplot(current_bootstrap[~np.isinf(current_bootstrap)],
475476
positions=[tick],
476477
**violinplot_kwargs)
477478
# Turn the violinplot into half, and color it the same as the swarmplot.

0 commit comments

Comments
 (0)