Skip to content

Commit 5b890c1

Browse files
committed
add a function first and try pushing
1 parent 9a97e72 commit 5b890c1

4 files changed

Lines changed: 148 additions & 4 deletions

File tree

dabest/_modidx.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
'dabest/_stats_tools/confint_2group_diff.py'),
3232
'dabest._stats_tools.confint_2group_diff.compute_bootstrapped_diff': ( 'API/confint_2group_diff.html#compute_bootstrapped_diff',
3333
'dabest/_stats_tools/confint_2group_diff.py'),
34+
'dabest._stats_tools.confint_2group_diff.compute_delta2_bootstrapped_diff': ( 'API/confint_2group_diff.html#compute_delta2_bootstrapped_diff',
35+
'dabest/_stats_tools/confint_2group_diff.py'),
3436
'dabest._stats_tools.confint_2group_diff.compute_interval_limits': ( 'API/confint_2group_diff.html#compute_interval_limits',
3537
'dabest/_stats_tools/confint_2group_diff.py'),
3638
'dabest._stats_tools.confint_2group_diff.compute_meandiff_bias_correction': ( 'API/confint_2group_diff.html#compute_meandiff_bias_correction',

dabest/_stats_tools/confint_2group_diff.py

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
# %% auto 0
44
__all__ = ['create_jackknife_indexes', 'create_repeated_indexes', 'compute_meandiff_jackknife', 'compute_bootstrapped_diff',
5-
'compute_meandiff_bias_correction', 'compute_interval_limits', 'calculate_group_var',
6-
'calculate_weighted_delta']
5+
'compute_delta2_bootstrapped_diff', 'compute_meandiff_bias_correction', 'compute_interval_limits',
6+
'calculate_group_var', 'calculate_weighted_delta']
77

88
# %% ../../nbs/API/confint_2group_diff.ipynb 4
99
import numpy as np
@@ -155,6 +155,77 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,
155155

156156
return out
157157

158+
def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1
159+
x2:np.ndarray,# Test group 1
160+
x3:np.ndarray,# Control group 2
161+
x4:np.ndarray,# Test group 2
162+
is_paired:str=None,
163+
resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.
164+
random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.
165+
)->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta
166+
"""
167+
Bootstraps the effect size deltas' g.
168+
169+
"""
170+
171+
import numpy as np
172+
import pandas as pd
173+
from numpy.random import PCG64, RandomState
174+
175+
rng = RandomState(PCG64(random_seed))
176+
x1_len = len(x1)
177+
x2_len = len(x2)
178+
x3_len = len(x3)
179+
x4_len = len(x4)
180+
out_delta_g = np.repeat(np.nan, resamples)
181+
deltadelta = np.repeat(np.nan, resamples)
182+
183+
n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len
184+
s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)
185+
186+
sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + (
187+
n_a1_b2 - 1) * s_a1_b2 ** 2)
188+
sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)
189+
pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)
190+
191+
for i in range(int(resamples)):
192+
if is_paired:
193+
if (x1_len != x2_len) or (x3_len != x4_len):
194+
raise ValueError("The two arrays do not have the same length.")
195+
df_paired_1 = pd.DataFrame({
196+
'value': np.concatenate([x1, x3]),
197+
'array_id': np.repeat(['x1','x3'], [x1_len, x3_len])
198+
})
199+
df_paired_2 = pd.DataFrame({
200+
'value': np.concatenate([x2, x4]),
201+
'array_id': np.repeat(['x2','x4'], [x1_len, x3_len])
202+
})
203+
x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True)
204+
x_sample_1 = df_paired_1.loc[x_sample_index]
205+
x_sample_2 = df_paired_2.loc[x_sample_index]
206+
x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value']
207+
x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value']
208+
x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value']
209+
x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value']
210+
else:
211+
df = pd.DataFrame({
212+
'value': np.concatenate([x1, x2, x3, x4]),
213+
'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len])
214+
})
215+
x_sample_index = rng.choice(len(df),len(df), replace=True)
216+
x_sample = df.loc[x_sample_index]
217+
x1_sample = x_sample[x_sample['array_id'] == 'x1']['value']
218+
x2_sample = x_sample[x_sample['array_id'] == 'x2']['value']
219+
x3_sample = x_sample[x_sample['array_id'] == 'x3']['value']
220+
x4_sample = x_sample[x_sample['array_id'] == 'x4']['value']
221+
222+
delta_1 = np.mean(x2_sample)-np.mean(x1_sample)
223+
delta_2 = np.mean(x4_sample)-np.mean(x3_sample)
224+
delta_delta = delta_2 - delta_1
225+
deltadelta[i] = delta_delta
226+
out_delta_g[i] = delta_delta/pooled_sample_sd
227+
delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd
228+
return out_delta_g, delta_g, deltadelta
158229

159230

160231

nbs/API/class.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4005,9 +4005,9 @@
40054005
],
40064006
"metadata": {
40074007
"kernelspec": {
4008-
"display_name": "conda-env-dabest-nbdev-py",
4008+
"display_name": "python3",
40094009
"language": "python",
4010-
"name": "conda-env-dabest-nbdev-py"
4010+
"name": "python3"
40114011
}
40124012
},
40134013
"nbformat": 4,

nbs/API/confint_2group_diff.ipynb

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,77 @@
212212
" \n",
213213
" return out\n",
214214
"\n",
215+
"def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1\n",
216+
" x2:np.ndarray,# Test group 1\n",
217+
" x3:np.ndarray,# Control group 2\n",
218+
" x4:np.ndarray,# Test group 2\n",
219+
" is_paired:str=None,\n",
220+
" resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n",
221+
" random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n",
222+
" )->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n",
223+
" \"\"\"\n",
224+
" Bootstraps the effect size deltas' g.\n",
225+
" \n",
226+
" \"\"\"\n",
227+
"\n",
228+
" import numpy as np\n",
229+
" import pandas as pd\n",
230+
" from numpy.random import PCG64, RandomState\n",
231+
"\n",
232+
" rng = RandomState(PCG64(random_seed))\n",
233+
" x1_len = len(x1)\n",
234+
" x2_len = len(x2)\n",
235+
" x3_len = len(x3)\n",
236+
" x4_len = len(x4)\n",
237+
" out_delta_g = np.repeat(np.nan, resamples)\n",
238+
" deltadelta = np.repeat(np.nan, resamples)\n",
239+
"\n",
240+
" n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len\n",
241+
" s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)\n",
242+
"\n",
243+
" sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + (\n",
244+
" n_a1_b2 - 1) * s_a1_b2 ** 2)\n",
245+
" sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)\n",
246+
" pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n",
247+
"\n",
248+
" for i in range(int(resamples)):\n",
249+
" if is_paired:\n",
250+
" if (x1_len != x2_len) or (x3_len != x4_len):\n",
251+
" raise ValueError(\"The two arrays do not have the same length.\")\n",
252+
" df_paired_1 = pd.DataFrame({\n",
253+
" 'value': np.concatenate([x1, x3]),\n",
254+
" 'array_id': np.repeat(['x1','x3'], [x1_len, x3_len])\n",
255+
" })\n",
256+
" df_paired_2 = pd.DataFrame({\n",
257+
" 'value': np.concatenate([x2, x4]),\n",
258+
" 'array_id': np.repeat(['x2','x4'], [x1_len, x3_len])\n",
259+
" })\n",
260+
" x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True)\n",
261+
" x_sample_1 = df_paired_1.loc[x_sample_index]\n",
262+
" x_sample_2 = df_paired_2.loc[x_sample_index]\n",
263+
" x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value']\n",
264+
" x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value']\n",
265+
" x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value']\n",
266+
" x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value']\n",
267+
" else:\n",
268+
" df = pd.DataFrame({\n",
269+
" 'value': np.concatenate([x1, x2, x3, x4]),\n",
270+
" 'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len])\n",
271+
" })\n",
272+
" x_sample_index = rng.choice(len(df),len(df), replace=True)\n",
273+
" x_sample = df.loc[x_sample_index]\n",
274+
" x1_sample = x_sample[x_sample['array_id'] == 'x1']['value']\n",
275+
" x2_sample = x_sample[x_sample['array_id'] == 'x2']['value']\n",
276+
" x3_sample = x_sample[x_sample['array_id'] == 'x3']['value']\n",
277+
" x4_sample = x_sample[x_sample['array_id'] == 'x4']['value']\n",
278+
"\n",
279+
" delta_1 = np.mean(x2_sample)-np.mean(x1_sample)\n",
280+
" delta_2 = np.mean(x4_sample)-np.mean(x3_sample)\n",
281+
" delta_delta = delta_2 - delta_1\n",
282+
" deltadelta[i] = delta_delta\n",
283+
" out_delta_g[i] = delta_delta/pooled_sample_sd\n",
284+
" delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd\n",
285+
" return out_delta_g, delta_g, deltadelta\n",
215286
"\n",
216287
"\n",
217288
"\n",

0 commit comments

Comments
 (0)