@@ -159,24 +159,28 @@ def compute_bootstrapped_diff(
159159
160160 return out
161161
162- @njit (cache = True ) # parallelization must be turned off for random number generation
163- def delta2_bootstrap_loop (x1 , x2 , x3 , x4 , resamples , pooled_sd , rng_seed , is_paired ):
162+
163+ @njit (cache = True )
164+ def delta2_bootstrap_loop (x1 , x2 , x3 , x4 , resamples , pooled_sd , rng_seed , is_paired , proportional = False ):
165+ """
166+ Compute bootstrapped differences for delta-delta, handling both regular and proportional data
167+ """
164168 np .random .seed (rng_seed )
165- out_delta_g = np .empty (resamples )
166169 deltadelta = np .empty (resamples )
167170
168- n1 , n2 , n3 , n4 = len (x1 ), len (x2 ), len (x3 ), len (x4 )
169- if is_paired :
170- if n1 != n2 or n3 != n4 :
171- raise ValueError ("Each control group must have the same length as its corresponding test group in paired analysis." )
171+ # For proportional data, we don't need out_delta_g
172+ out_delta_g = np .empty (resamples ) if not proportional else deltadelta # Use deltadelta as dummy
172173
174+ n1 , n2 , n3 , n4 = len (x1 ), len (x2 ), len (x3 ), len (x4 )
175+ if is_paired and (n1 != n2 or n3 != n4 ):
176+ raise ValueError ("Each control group must have the same length as its corresponding test group in paired analysis." )
173177
174178 # Bootstrapping
175179 for i in range (resamples ):
176180 # Paired or unpaired resampling
177181 if is_paired :
178- indices_1 = np .random .choice (len (x1 ),len (x1 ))
179- indices_2 = np .random .choice (len (x3 ),len (x3 ))
182+ indices_1 = np .random .choice (len (x1 ), len (x1 ))
183+ indices_2 = np .random .choice (len (x3 ), len (x3 ))
180184 x1_sample , x2_sample = x1 [indices_1 ], x2 [indices_1 ]
181185 x3_sample , x4_sample = x3 [indices_2 ], x4 [indices_2 ]
182186 else :
@@ -187,13 +191,14 @@ def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_pai
187191 x1_sample , x2_sample = x1 [indices_1 ], x2 [indices_2 ]
188192 x3_sample , x4_sample = x3 [indices_3 ], x4 [indices_4 ]
189193
190- # Calculating deltas
194+ # Calculate deltas
191195 delta_1 = np .mean (x2_sample ) - np .mean (x1_sample )
192196 delta_2 = np .mean (x4_sample ) - np .mean (x3_sample )
193197 delta_delta = delta_2 - delta_1
194-
198+
195199 deltadelta [i ] = delta_delta
196- out_delta_g [i ] = delta_delta / pooled_sd
200+ if not proportional :
201+ out_delta_g [i ] = delta_delta / pooled_sd
197202
198203 return out_delta_g , deltadelta
199204
@@ -204,39 +209,42 @@ def compute_delta2_bootstrapped_diff(
204209 x3 : np .ndarray , # Control group 2
205210 x4 : np .ndarray , # Test group 2
206211 is_paired : str = None ,
207- resamples : int = 5000 , # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.
208- random_seed : int = 12345 , # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.
209- ) -> (
210- tuple
211- ): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta
212+ resamples : int = 5000 ,
213+ random_seed : int = 12345 ,
214+ proportional : bool = False
215+ ) -> tuple :
212216 """
213- Bootstraps the effect size deltas' g.
214-
217+ Bootstraps the effect size deltas' g or proportional delta-delta
215218 """
216-
217219 x1 , x2 , x3 , x4 = map (np .asarray , [x1 , x2 , x3 , x4 ])
218-
219- # Calculating pooled sample standard deviation
220- stds = [np .std (x ) for x in [x1 , x2 , x3 , x4 ]]
221- ns = [len (x ) for x in [x1 , x2 , x3 , x4 ]]
222-
223- sd_numerator = sum ((n - 1 ) * s ** 2 for n , s in zip (ns , stds ))
224- sd_denominator = sum (n - 1 for n in ns )
225-
226- # Avoid division by zero
227- if sd_denominator == 0 :
228- raise ValueError ("Insufficient data to compute pooled standard deviation." )
229-
230- pooled_sample_sd = np .sqrt (sd_numerator / sd_denominator )
231-
232- # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)
233- if np .isnan (pooled_sample_sd ) or pooled_sample_sd == 0 :
234- raise ValueError ("Pooled sample standard deviation is NaN or zero." )
235-
236- out_delta_g , deltadelta = delta2_bootstrap_loop (x1 , x2 , x3 , x4 , resamples , pooled_sample_sd , random_seed , is_paired )
237-
238- # Empirical delta_g calculation
239- delta_g = ((np .mean (x4 ) - np .mean (x3 )) - (np .mean (x2 ) - np .mean (x1 ))) / pooled_sample_sd
220+
221+ if proportional :
222+ # For proportional data, pass 1.0 as dummy pooled_sd (won't be used)
223+ out_delta_g , deltadelta = delta2_bootstrap_loop (
224+ x1 , x2 , x3 , x4 , resamples , 1.0 , random_seed , is_paired , proportional = True
225+ )
226+ # For proportional data, delta_g is the empirical delta-delta
227+ delta_g = ((np .mean (x4 ) - np .mean (x3 )) - (np .mean (x2 ) - np .mean (x1 )))
228+ else :
229+ # Calculate pooled sample standard deviation for non-proportional data
230+ stds = [np .std (x ) for x in [x1 , x2 , x3 , x4 ]]
231+ ns = [len (x ) for x in [x1 , x2 , x3 , x4 ]]
232+
233+ sd_numerator = sum ((n - 1 ) * s ** 2 for n , s in zip (ns , stds ))
234+ sd_denominator = sum (n - 1 for n in ns )
235+
236+ if sd_denominator == 0 :
237+ raise ValueError ("Insufficient data to compute pooled standard deviation." )
238+
239+ pooled_sample_sd = np .sqrt (sd_numerator / sd_denominator )
240+
241+ if np .isnan (pooled_sample_sd ) or pooled_sample_sd == 0 :
242+ raise ValueError ("Pooled sample standard deviation is NaN or zero." )
243+
244+ out_delta_g , deltadelta = delta2_bootstrap_loop (
245+ x1 , x2 , x3 , x4 , resamples , pooled_sample_sd , random_seed , is_paired , proportional = False
246+ )
247+ delta_g = ((np .mean (x4 ) - np .mean (x3 )) - (np .mean (x2 ) - np .mean (x1 ))) / pooled_sample_sd
240248
241249 return out_delta_g , delta_g , deltadelta
242250
0 commit comments