Skip to content

Commit 4673a18

Browse files
committed
Fix Issue ACCLAB/dabestr#48 : bug in calculating 95CIs for paired diffs
1 parent 4f59fdb commit 4673a18

2 files changed

Lines changed: 72 additions & 28 deletions

File tree

dabest/_stats_tools/confint_1group.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,28 @@
22
# -*-coding: utf-8 -*-
33
# Author: Joses Ho
44
# Email : joseshowh@gmail.com
5+
"""
6+
A range of functions to compute bootstraps for a single sample.
7+
"""
58

9+
def create_bootstrap_indexes(array, resamples=5000, random_seed=12345):
10+
"""Given an array-like, returns a generator of bootstrap indexes
11+
to be used for resampling.
12+
"""
13+
import numpy as np
14+
15+
# Set seed.
16+
np.random.seed(random_seed)
17+
18+
indexes = range(0, len(array))
619

20+
out = (np.random.choice(indexes, len(indexes), replace=True)
21+
for i in range(0, resamples))
22+
23+
# Reset seed
24+
np.random.seed()
25+
26+
return out
727

828
def compute_1group_jackknife(x, func, *args, **kwargs):
929
"""
@@ -20,22 +40,6 @@ def compute_1group_jackknife(x, func, *args, **kwargs):
2040
def compute_1group_acceleration(jack_dist):
2141
from . import confint_2group_diff as ci_2g
2242
return ci_2g._calc_accel(jack_dist)
23-
24-
25-
26-
def _create_bootstrap_indexes(array, resamples=5000):
27-
"""Given an array-like, returns a generator of bootstrap indexes
28-
to be used for resampling.
29-
"""
30-
import numpy as np
31-
32-
indexes = range(0, len(array))
33-
34-
out = (np.random.choice(indexes, len(indexes), replace=True)
35-
for i in range(0, resamples))
36-
37-
return out
38-
3943

4044

4145

@@ -49,8 +53,9 @@ def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345,
4953
np.random.seed(random_seed)
5054

5155
# Create bootstrap indexes.
52-
boot_indexes = _create_bootstrap_indexes(x, resamples)
53-
56+
boot_indexes = create_bootstrap_indexes(x, resamples=resamples,
57+
random_seed=random_seed)
58+
5459
out = [func(x[b], *args, **kwargs) for b in boot_indexes]
5560

5661
del boot_indexes
@@ -123,11 +128,13 @@ def summary_ci_1group(x, func, resamples=5000, alpha=0.05, random_seed=12345,
123128
from . import confint_2group_diff as ci2g
124129
from numpy import sort as npsort
125130

126-
boots = compute_1group_bootstraps(x, func, resamples, random_seed)
127-
bias = compute_1group_bias_correction(x, boots, func)
131+
boots = compute_1group_bootstraps(x, func, resamples=resamples,
132+
random_seed=random_seed,
133+
*args, **kwargs)
134+
bias = compute_1group_bias_correction(x, boots, func)
128135

129-
jk = compute_1group_jackknife(x, func)
130-
accel = ci2g._calc_accel(jk)
136+
jk = compute_1group_jackknife(x, func, *args, **kwargs)
137+
accel = compute_1group_acceleration(jk)
131138
del jk
132139

133140
ci_idx = ci2g.compute_interval_limits(bias, accel, resamples, alpha)

dabest/_stats_tools/confint_2group_diff.py

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
# -*-coding: utf-8 -*-
33
# Author: Joses Ho
44
# Email : joseshowh@gmail.com
5-
6-
5+
"""
6+
A range of functions to compute bootstraps for the mean difference
7+
between two groups.
8+
"""
79

810
def create_jackknife_indexes(data):
911
"""
@@ -103,9 +105,34 @@ def _calc_accel(jack_dist):
103105

104106

105107

108+
# def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,
109+
# resamples=5000, random_seed=12345):
110+
# """Bootstraps the effect_size for 2 groups."""
111+
# from . import effsize as __es
112+
# import numpy as np
113+
#
114+
# np.random.seed(random_seed)
115+
#
116+
# out = np.repeat(np.nan, resamples)
117+
# x0_len = len(x0)
118+
# x1_len = len(x1)
119+
#
120+
# for i in range(int(resamples)):
121+
# x0_boot = np.random.choice(x0, x0_len, replace=True)
122+
# x1_boot = np.random.choice(x1, x1_len, replace=True)
123+
# out[i] = __es.two_group_difference(x0_boot, x1_boot,
124+
# is_paired, effect_size)
125+
#
126+
# # reset seed
127+
# np.random.seed()
128+
#
129+
# return out
130+
131+
106132
def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,
107133
resamples=5000, random_seed=12345):
108134
"""Bootstraps the effect_size for 2 groups."""
135+
109136
from . import effsize as __es
110137
import numpy as np
111138

@@ -114,11 +141,20 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,
114141
out = np.repeat(np.nan, resamples)
115142
x0_len = len(x0)
116143
x1_len = len(x1)
117-
144+
118145
for i in range(int(resamples)):
119-
x0_boot = np.random.choice(x0, x0_len, replace=True)
120-
x1_boot = np.random.choice(x1, x1_len, replace=True)
121-
out[i] = __es.two_group_difference(x0_boot, x1_boot,
146+
147+
if is_paired:
148+
if x0_len != x1_len:
149+
raise ValueError("The two arrays do not have the same length.")
150+
random_idx = np.random.choice(x0_len, x0_len, replace=True)
151+
x0_sample = x0[random_idx]
152+
x1_sample = x1[random_idx]
153+
else:
154+
x0_sample = np.random.choice(x0, x0_len, replace=True)
155+
x1_sample = np.random.choice(x1, x1_len, replace=True)
156+
157+
out[i] = __es.two_group_difference(x0_sample, x1_sample,
122158
is_paired, effect_size)
123159

124160
# reset seed
@@ -128,6 +164,7 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,
128164

129165

130166

167+
131168
def compute_meandiff_bias_correction(bootstraps, effsize):
132169
"""
133170
Computes the bias correction required for the BCa method

0 commit comments

Comments
 (0)