Skip to content

Commit 9bfed8a

Browse files
Jacobluke-JAnns98
authored andcommitted
Delta-delta calculation for Proportional
1 parent f0730ab commit 9bfed8a

2 files changed

Lines changed: 51 additions & 42 deletions

File tree

dabest/_effsize_objects.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ def __pre_calc(self):
884884
self.__is_paired,
885885
self.__resamples,
886886
self.__random_seed,
887+
self.__proportional,
887888
)
888889

889890
for j, current_tuple in enumerate(idx):

dabest/_stats_tools/confint_2group_diff.py

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -159,24 +159,28 @@ def compute_bootstrapped_diff(
159159

160160
return out
161161

162-
@njit(cache=True) # parallelization must be turned off for random number generation
163-
def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired):
162+
163+
@njit(cache=True)
164+
def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired, proportional=False):
165+
"""
166+
Compute bootstrapped differences for delta-delta, handling both regular and proportional data
167+
"""
164168
np.random.seed(rng_seed)
165-
out_delta_g = np.empty(resamples)
166169
deltadelta = np.empty(resamples)
167170

168-
n1, n2, n3, n4 = len(x1), len(x2), len(x3), len(x4)
169-
if is_paired:
170-
if n1 != n2 or n3 != n4:
171-
raise ValueError("Each control group must have the same length as its corresponding test group in paired analysis.")
171+
# For proportional data, we don't need out_delta_g
172+
out_delta_g = np.empty(resamples) if not proportional else deltadelta # Use deltadelta as dummy
172173

174+
n1, n2, n3, n4 = len(x1), len(x2), len(x3), len(x4)
175+
if is_paired and (n1 != n2 or n3 != n4):
176+
raise ValueError("Each control group must have the same length as its corresponding test group in paired analysis.")
173177

174178
# Bootstrapping
175179
for i in range(resamples):
176180
# Paired or unpaired resampling
177181
if is_paired:
178-
indices_1 = np.random.choice(len(x1),len(x1))
179-
indices_2 = np.random.choice(len(x3),len(x3))
182+
indices_1 = np.random.choice(len(x1), len(x1))
183+
indices_2 = np.random.choice(len(x3), len(x3))
180184
x1_sample, x2_sample = x1[indices_1], x2[indices_1]
181185
x3_sample, x4_sample = x3[indices_2], x4[indices_2]
182186
else:
@@ -187,13 +191,14 @@ def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_pai
187191
x1_sample, x2_sample = x1[indices_1], x2[indices_2]
188192
x3_sample, x4_sample = x3[indices_3], x4[indices_4]
189193

190-
# Calculating deltas
194+
# Calculate deltas
191195
delta_1 = np.mean(x2_sample) - np.mean(x1_sample)
192196
delta_2 = np.mean(x4_sample) - np.mean(x3_sample)
193197
delta_delta = delta_2 - delta_1
194-
198+
195199
deltadelta[i] = delta_delta
196-
out_delta_g[i] = delta_delta / pooled_sd
200+
if not proportional:
201+
out_delta_g[i] = delta_delta / pooled_sd
197202

198203
return out_delta_g, deltadelta
199204

@@ -204,39 +209,42 @@ def compute_delta2_bootstrapped_diff(
204209
x3: np.ndarray, # Control group 2
205210
x4: np.ndarray, # Test group 2
206211
is_paired: str = None,
207-
resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.
208-
random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.
209-
) -> (
210-
tuple
211-
): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta
212+
resamples: int = 5000,
213+
random_seed: int = 12345,
214+
proportional: bool = False
215+
) -> tuple:
212216
"""
213-
Bootstraps the effect size deltas' g.
214-
217+
Bootstraps the effect size deltas' g or proportional delta-delta
215218
"""
216-
217219
x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4])
218-
219-
# Calculating pooled sample standard deviation
220-
stds = [np.std(x) for x in [x1, x2, x3, x4]]
221-
ns = [len(x) for x in [x1, x2, x3, x4]]
222-
223-
sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))
224-
sd_denominator = sum(n - 1 for n in ns)
225-
226-
# Avoid division by zero
227-
if sd_denominator == 0:
228-
raise ValueError("Insufficient data to compute pooled standard deviation.")
229-
230-
pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)
231-
232-
# Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)
233-
if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:
234-
raise ValueError("Pooled sample standard deviation is NaN or zero.")
235-
236-
out_delta_g, deltadelta = delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired)
237-
238-
# Empirical delta_g calculation
239-
delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd
220+
221+
if proportional:
222+
# For proportional data, pass 1.0 as dummy pooled_sd (won't be used)
223+
out_delta_g, deltadelta = delta2_bootstrap_loop(
224+
x1, x2, x3, x4, resamples, 1.0, random_seed, is_paired, proportional=True
225+
)
226+
# For proportional data, delta_g is the empirical delta-delta
227+
delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)))
228+
else:
229+
# Calculate pooled sample standard deviation for non-proportional data
230+
stds = [np.std(x) for x in [x1, x2, x3, x4]]
231+
ns = [len(x) for x in [x1, x2, x3, x4]]
232+
233+
sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))
234+
sd_denominator = sum(n - 1 for n in ns)
235+
236+
if sd_denominator == 0:
237+
raise ValueError("Insufficient data to compute pooled standard deviation.")
238+
239+
pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)
240+
241+
if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:
242+
raise ValueError("Pooled sample standard deviation is NaN or zero.")
243+
244+
out_delta_g, deltadelta = delta2_bootstrap_loop(
245+
x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired, proportional=False
246+
)
247+
delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd
240248

241249
return out_delta_g, delta_g, deltadelta
242250

0 commit comments

Comments
 (0)