|
75 | 75 | "outputs": [], |
76 | 76 | "source": [ |
77 | 77 | "# Dropped to 30 reps to save time. v0.2.5.\n", |
78 | | - "reps = 30\n", |
79 | | - "ci = 95\n", |
| 78 | + "reps=30\n", |
| 79 | + "ci=95\n", |
80 | 80 | "POPULATION_N = 10000\n", |
81 | 81 | "SAMPLE_N = 10\n", |
82 | 82 | "\n", |
83 | 83 | "# Create data for hedges g and cohens d.\n", |
84 | 84 | "CONTROL_MEAN = np.random.randint(1, 1000)\n", |
85 | | - "POP_SD = np.random.randint(1, 15)\n", |
86 | | - "POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n", |
| 85 | + "POP_SD = np.random.randint(1, 15)\n", |
| 86 | + "POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)\n", |
87 | 87 | "\n", |
88 | 88 | "TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)\n", |
89 | 89 | "norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)\n", |
90 | 90 | "c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n", |
91 | | - "t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n", |
| 91 | + "t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)\n", |
| 92 | + "\n", |
| 93 | + "std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n", |
92 | 94 | "\n", |
93 | | - "std_diff_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n", |
94 | 95 | "\n", |
95 | 96 | "\n", |
96 | 97 | "# Create mean_diff data\n", |
97 | 98 | "CONTROL_MEAN = np.random.randint(1, 1000)\n", |
98 | | - "POP_SD = np.random.randint(1, 15)\n", |
99 | | - "TRUE_DIFFERENCE = np.random.randint(-POP_SD * 5, POP_SD * 5)\n", |
| 99 | + "POP_SD = np.random.randint(1, 15)\n", |
| 100 | + "TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)\n", |
100 | 101 | "\n", |
101 | 102 | "c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)\n", |
102 | | - "t1 = norm.rvs(loc=CONTROL_MEAN + TRUE_DIFFERENCE, **norm_sample_kwargs)\n", |
| 103 | + "t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)\n", |
| 104 | + "\n", |
| 105 | + "mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})\n", |
103 | 106 | "\n", |
104 | | - "mean_df = pd.DataFrame({\"Control\": c1, \"Test\": t1})\n", |
105 | 107 | "\n", |
106 | 108 | "\n", |
107 | 109 | "# Create median_diff data\n", |
|
110 | 112 | "\n", |
111 | 113 | "skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)\n", |
112 | 114 | "skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)\n", |
113 | | - "skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100 + MEDIAN_DIFFERENCE)\n", |
| 115 | + "skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)\n", |
114 | 116 | "\n", |
115 | 117 | "sample_kwargs = dict(replace=False, size=SAMPLE_N)\n", |
116 | 118 | "skewsample1 = np.random.choice(skewpop1, **sample_kwargs)\n", |
117 | 119 | "skewsample2 = np.random.choice(skewpop2, **sample_kwargs)\n", |
118 | 120 | "\n", |
119 | | - "median_df = pd.DataFrame({\"Control\": skewsample1, \"Test\": skewsample2})\n", |
| 121 | + "median_df = pd.DataFrame({'Control' : skewsample1, 'Test': skewsample2})\n", |
| 122 | + "\n", |
120 | 123 | "\n", |
121 | 124 | "\n", |
122 | 125 | "# Create two populations with a 50% overlap.\n", |
|
125 | 128 | "\n", |
126 | 129 | "pop_kwargs = dict(scale=SD, size=POPULATION_N)\n", |
127 | 130 | "pop1 = norm.rvs(loc=100, **pop_kwargs)\n", |
128 | | - "pop2 = norm.rvs(loc=100 + CD_DIFFERENCE, **pop_kwargs)\n", |
| 131 | + "pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)\n", |
129 | 132 | "\n", |
130 | 133 | "sample_kwargs = dict(replace=False, size=SAMPLE_N)\n", |
131 | 134 | "sample1 = np.random.choice(pop1, **sample_kwargs)\n", |
132 | 135 | "sample2 = np.random.choice(pop2, **sample_kwargs)\n", |
133 | 136 | "\n", |
134 | | - "cd_df = pd.DataFrame({\"Control\": sample1, \"Test\": sample2})\n", |
| 137 | + "cd_df = pd.DataFrame({'Control' : sample1, 'Test': sample2})\n", |
| 138 | + "\n", |
135 | 139 | "\n", |
136 | 140 | "\n", |
137 | 141 | "# Create several CIs and see if the true population difference lies within.\n", |
138 | | - "error_count_cohens_d = 0\n", |
139 | | - "error_count_hedges_g = 0\n", |
140 | | - "error_count_mean_diff = 0\n", |
141 | | - "error_count_median_diff = 0\n", |
| 142 | + "error_count_cohens_d = 0\n", |
| 143 | + "error_count_hedges_g = 0\n", |
| 144 | + "error_count_mean_diff = 0\n", |
| 145 | + "error_count_median_diff = 0\n", |
142 | 146 | "error_count_cliffs_delta = 0\n", |
143 | 147 | "\n", |
144 | 148 | "for i in range(0, reps):\n", |
145 | | - " # print(i) # for debug.\n", |
| 149 | + " print(i) # for debug.\n", |
146 | 150 | " # pick a random seed\n", |
147 | 151 | " rnd_sd = np.random.randint(0, 999999)\n", |
148 | 152 | " load_kwargs = dict(ci=ci, random_seed=rnd_sd)\n", |
|
151 | 155 | " cd = std_diff_data.cohens_d.results\n", |
152 | 156 | " # print(\"cohen's d\") # for debug.\n", |
153 | 157 | " cd_low, cd_high = float(cd.bca_low), float(cd.bca_high)\n", |
154 | | - " if not cd_low < POP_D < cd_high:\n", |
| 158 | + " if cd_low < POP_D < cd_high is False:\n", |
155 | 159 | " error_count_cohens_d += 1\n", |
156 | 160 | "\n", |
157 | 161 | " hg = std_diff_data.hedges_g.results\n", |
158 | 162 | " # print(\"hedges' g\") # for debug.\n", |
159 | 163 | " hg_low, hg_high = float(hg.bca_low), float(hg.bca_high)\n", |
160 | | - " if not hg_low < POP_D < hg_high:\n", |
| 164 | + " if hg_low < POP_D < hg_high is False:\n", |
161 | 165 | " error_count_hedges_g += 1\n", |
162 | 166 | "\n", |
| 167 | + "\n", |
163 | 168 | " mean_diff_data = load(data=mean_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n", |
164 | 169 | " mean_d = mean_diff_data.mean_diff.results\n", |
165 | 170 | " # print(\"mean diff\") # for debug.\n", |
166 | 171 | " mean_d_low, mean_d_high = float(mean_d.bca_low), float(mean_d.bca_high)\n", |
167 | | - " if not mean_d_low < TRUE_DIFFERENCE < mean_d_high:\n", |
| 172 | + " if mean_d_low < TRUE_DIFFERENCE < mean_d_high is False:\n", |
168 | 173 | " error_count_mean_diff += 1\n", |
169 | 174 | "\n", |
170 | | - " median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n", |
| 175 | + "\n", |
| 176 | + " median_diff_data = load(data=median_df, idx=(\"Control\", \"Test\"),\n", |
| 177 | + " **load_kwargs)\n", |
171 | 178 | " median_d = median_diff_data.median_diff.results\n", |
172 | 179 | " # print(\"median diff\") # for debug.\n", |
173 | 180 | " median_d_low, median_d_high = float(median_d.bca_low), float(median_d.bca_high)\n", |
174 | | - " if not median_d_low < MEDIAN_DIFFERENCE < median_d_high:\n", |
| 181 | + " if median_d_low < MEDIAN_DIFFERENCE < median_d_high is False:\n", |
175 | 182 | " error_count_median_diff += 1\n", |
176 | 183 | "\n", |
| 184 | + "\n", |
177 | 185 | " cd_data = load(data=cd_df, idx=(\"Control\", \"Test\"), **load_kwargs)\n", |
178 | 186 | " cliffs = cd_data.cliffs_delta.results\n", |
179 | 187 | " # print(\"cliff's delta\") # for debug.\n", |
180 | 188 | " low, high = float(cliffs.bca_low), float(cliffs.bca_high)\n", |
181 | | - " if not low < 0.5 < high:\n", |
| 189 | + " if low < 0.5 < high is False:\n", |
182 | 190 | " error_count_cliffs_delta += 1\n", |
183 | 191 | "\n", |
184 | 192 | "\n", |
185 | 193 | "max_errors = int(np.ceil(reps * (100 - ci) / 100))\n", |
186 | 194 | "\n", |
187 | | - "assert error_count_cohens_d <= max_errors\n", |
188 | | - "assert error_count_hedges_g <= max_errors\n", |
189 | | - "assert error_count_mean_diff <= max_errors\n", |
190 | | - "assert error_count_median_diff <= max_errors\n", |
191 | | - "assert error_count_cliffs_delta <= max_errors" |
| 195 | + "assert error_count_cohens_d <= max_errors\n", |
| 196 | + "assert error_count_hedges_g <= max_errors\n", |
| 197 | + "assert error_count_mean_diff <= max_errors\n", |
| 198 | + "assert error_count_median_diff <= max_errors\n", |
| 199 | + "assert error_count_cliffs_delta <= max_errors\n" |
192 | 200 | ] |
193 | 201 | }, |
194 | 202 | { |
|
205 | 213 | "display_name": "python3", |
206 | 214 | "language": "python", |
207 | 215 | "name": "python3" |
| 216 | + }, |
| 217 | + "language_info": { |
| 218 | + "name": "python", |
| 219 | + "version": "3.10.12" |
208 | 220 | } |
209 | 221 | }, |
210 | 222 | "nbformat": 4, |
|
0 commit comments