99import pandas as pd
1010import lqrt
1111from scipy .stats import norm
12+ import numpy as np
1213from numpy import array , isnan , isinf , repeat , random , isin , abs , var
1314from numpy import sort as npsort
1415from numpy import nan as npnan
@@ -357,12 +358,17 @@ def _perform_statistical_test(self):
357358 # References:
358359 # https://en.wikipedia.org/wiki/McNemar%27s_test
359360
360- df_temp = pd .DataFrame ({"control" : self .__control , "test" : self .__test })
361- x1 = len (df_temp [(df_temp ["control" ] == 0 ) & (df_temp ["test" ] == 0 )])
362- x2 = len (df_temp [(df_temp ["control" ] == 0 ) & (df_temp ["test" ] == 1 )])
363- x3 = len (df_temp [(df_temp ["control" ] == 1 ) & (df_temp ["test" ] == 0 )])
364- x4 = len (df_temp [(df_temp ["control" ] == 1 ) & (df_temp ["test" ] == 1 )])
365- table = [[x1 , x2 ], [x3 , x4 ]]
361+ # df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
362+ # x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
363+ # x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
364+ # x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
365+ # x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
366+ # table = [[x1, x2], [x3, x4]]
367+ x1 = np .sum ((self .__control == 0 ) & (self .__test == 0 ))
368+ x2 = np .sum ((self .__control == 0 ) & (self .__test == 1 ))
369+ x3 = np .sum ((self .__control == 1 ) & (self .__test == 0 ))
370+ x4 = np .sum ((self .__control == 1 ) & (self .__test == 1 ))
371+ table = np .array ([[x1 , x2 ], [x3 , x4 ]])
366372 _mcnemar = mcnemar (table , exact = True , correction = True )
367373 self .__pvalue_mcnemar = _mcnemar .pvalue
368374 self .__statistic_mcnemar = _mcnemar .statistic
@@ -861,18 +867,19 @@ def __pre_calc(self):
861867 out = []
862868 reprs = []
863869
870+ grouped_data = {name : group [yvar ].copy () for name , group in dat .groupby (xvar , observed = False )}
864871 if self .__delta2 :
865872 mixed_data = []
866873 for j , current_tuple in enumerate (idx ):
867874 if self .__is_paired != "sequential" :
868875 cname = current_tuple [0 ]
869- control = dat [ dat [ xvar ] == cname ][ yvar ]. copy ()
876+ control = grouped_data [ cname ]
870877
871878 for ix , tname in enumerate (current_tuple [1 :]):
872879 if self .__is_paired == "sequential" :
873880 cname = current_tuple [ix ]
874- control = dat [ dat [ xvar ] == cname ][ yvar ]. copy ()
875- test = dat [ dat [ xvar ] == tname ][ yvar ]. copy ()
881+ control = grouped_data [ cname ]
882+ test = grouped_data [ tname ]
876883 mixed_data .append (control )
877884 mixed_data .append (test )
878885 bootstraps_delta_delta = ci2g .compute_delta2_bootstrapped_diff (
@@ -888,13 +895,13 @@ def __pre_calc(self):
888895 for j , current_tuple in enumerate (idx ):
889896 if self .__is_paired != "sequential" :
890897 cname = current_tuple [0 ]
891- control = dat [ dat [ xvar ] == cname ][ yvar ]. copy ()
898+ control = grouped_data [ cname ]
892899
893900 for ix , tname in enumerate (current_tuple [1 :]):
894901 if self .__is_paired == "sequential" :
895902 cname = current_tuple [ix ]
896- control = dat [ dat [ xvar ] == cname ][ yvar ]. copy ()
897- test = dat [ dat [ xvar ] == tname ][ yvar ]. copy ()
903+ control = grouped_data [ cname ]
904+ test = grouped_data [ tname ]
898905
899906 result = TwoGroupsEffectSize (
900907 control ,
@@ -1055,16 +1062,18 @@ def __calc_lqrt(self):
10551062
10561063 out = []
10571064
1065+ grouped_data = {name :group [yvar ].copy () for name , group in dat .groupby (xvar )}
1066+
10581067 for j , current_tuple in enumerate (db_obj .idx ):
10591068 if self .__is_paired != "sequential" :
10601069 cname = current_tuple [0 ]
1061- control = dat [ dat [ xvar ] == cname ][ yvar ]. copy ()
1070+ control = grouped_data [ cname ]
10621071
10631072 for ix , tname in enumerate (current_tuple [1 :]):
10641073 if self .__is_paired == "sequential" :
10651074 cname = current_tuple [ix ]
1066- control = dat [ dat [ xvar ] == cname ][ yvar ]. copy ()
1067- test = dat [ dat [ xvar ] == tname ][ yvar ]. copy ()
1075+ control = grouped_data [ cname ]
1076+ test = grouped_data [ tname ]
10681077
10691078 if self .__is_paired :
10701079 # Refactored here in v0.3.0 for performance issues.
0 commit comments