|
212 | 212 | " \n", |
213 | 213 | " return out\n", |
214 | 214 | "\n", |
| 215 | + "def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1\n", |
| 216 | + " x2:np.ndarray,# Test group 1\n", |
| 217 | + " x3:np.ndarray,# Control group 2\n", |
| 218 | + " x4:np.ndarray,# Test group 2\n", |
| 219 | + " is_paired:str=None,\n", |
| 220 | + " resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n", |
| 221 | + " random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", |
| 222 | + " )->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n", |
| 223 | + " \"\"\"\n", |
| 224 | + " Bootstraps the effect size deltas' g.\n", |
| 225 | + " \n", |
| 226 | + " \"\"\"\n", |
| 227 | + "\n", |
| 228 | + " import numpy as np\n", |
| 229 | + " import pandas as pd\n", |
| 230 | + " from numpy.random import PCG64, RandomState\n", |
| 231 | + "\n", |
| 232 | + " rng = RandomState(PCG64(random_seed))\n", |
| 233 | + " x1_len = len(x1)\n", |
| 234 | + " x2_len = len(x2)\n", |
| 235 | + " x3_len = len(x3)\n", |
| 236 | + " x4_len = len(x4)\n", |
| 237 | + " out_delta_g = np.repeat(np.nan, resamples)\n", |
| 238 | + " deltadelta = np.repeat(np.nan, resamples)\n", |
| 239 | + "\n", |
| 240 | + " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len\n", |
| 241 | + " s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)\n", |
| 242 | + "\n", |
| 243 | + " sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + (\n", |
| 244 | + " n_a1_b2 - 1) * s_a1_b2 ** 2)\n", |
| 245 | + " sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)\n", |
| 246 | + " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", |
| 247 | + "\n", |
| 248 | + " for i in range(int(resamples)):\n", |
| 249 | + " if is_paired:\n", |
| 250 | + " if (x1_len != x2_len) or (x3_len != x4_len):\n", |
| 251 | + " raise ValueError(\"The two arrays do not have the same length.\")\n", |
| 252 | + " df_paired_1 = pd.DataFrame({\n", |
| 253 | + " 'value': np.concatenate([x1, x3]),\n", |
| 254 | + " 'array_id': np.repeat(['x1','x3'], [x1_len, x3_len])\n", |
| 255 | + " })\n", |
| 256 | + " df_paired_2 = pd.DataFrame({\n", |
| 257 | + " 'value': np.concatenate([x2, x4]),\n", |
| 258 | + " 'array_id': np.repeat(['x2','x4'], [x1_len, x3_len])\n", |
| 259 | + " })\n", |
| 260 | + " x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True)\n", |
| 261 | + " x_sample_1 = df_paired_1.loc[x_sample_index]\n", |
| 262 | + " x_sample_2 = df_paired_2.loc[x_sample_index]\n", |
| 263 | + " x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value']\n", |
| 264 | + " x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value']\n", |
| 265 | + " x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value']\n", |
| 266 | + " x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value']\n", |
| 267 | + " else:\n", |
| 268 | + " df = pd.DataFrame({\n", |
| 269 | + " 'value': np.concatenate([x1, x2, x3, x4]),\n", |
| 270 | + " 'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len])\n", |
| 271 | + " })\n", |
| 272 | + " x_sample_index = rng.choice(len(df),len(df), replace=True)\n", |
| 273 | + " x_sample = df.loc[x_sample_index]\n", |
| 274 | + " x1_sample = x_sample[x_sample['array_id'] == 'x1']['value']\n", |
| 275 | + " x2_sample = x_sample[x_sample['array_id'] == 'x2']['value']\n", |
| 276 | + " x3_sample = x_sample[x_sample['array_id'] == 'x3']['value']\n", |
| 277 | + " x4_sample = x_sample[x_sample['array_id'] == 'x4']['value']\n", |
| 278 | + "\n", |
| 279 | + " delta_1 = np.mean(x2_sample)-np.mean(x1_sample)\n", |
| 280 | + " delta_2 = np.mean(x4_sample)-np.mean(x3_sample)\n", |
| 281 | + " delta_delta = delta_2 - delta_1\n", |
| 282 | + " deltadelta[i] = delta_delta\n", |
| 283 | + " out_delta_g[i] = delta_delta/pooled_sample_sd\n", |
| 284 | + " delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd\n", |
| 285 | + " return out_delta_g, delta_g, deltadelta\n", |
215 | 286 | "\n", |
216 | 287 | "\n", |
217 | 288 | "\n", |
|
0 commit comments