Skip to content

Commit 3de867f

Browse files
authored
Merge pull request #192 from ACCLAB/feat-numba
Adding support for Numba
2 parents 7c2e9c9 + 93d1547 commit 3de867f

132 files changed

Lines changed: 534 additions & 290 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

dabest/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
from ._api import load, prop_dataset
22
from ._stats_tools import effsize as effsize
3+
from ._stats_tools import confint_2group_diff as ci_2g
34
from ._effsize_objects import TwoGroupsEffectSize, PermutationTest
45
from ._dabest_object import Dabest
56

6-
__version__ = "2024.03.29"
7+
8+
import os
9+
if os.environ.get('SKIP_NUMBA_COMPILE') != '1':
10+
from ._stats_tools.precompile import precompile_all, _NUMBA_COMPILED
11+
if not _NUMBA_COMPILED:
12+
precompile_all()
13+
14+
__version__ = "2024.03.30"

dabest/_bootstrap_tools.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ def __init__(
6666
reps: int = 5000, # Number of bootstrap iterations to perform.
6767
):
6868
# Turn to pandas series.
69-
x1 = pd.Series(x1).dropna()
69+
# x1 = pd.Series(x1).dropna()
70+
x1 = x1[~np.isnan(x1)]
71+
7072
diff = False
7173

7274
# Initialise stat_function
@@ -89,7 +91,9 @@ def __init__(
8991
if x2 is None:
9092
raise ValueError("Please specify x2.")
9193

92-
x2 = pd.Series(x2).dropna()
94+
# x2 = pd.Series(x2).dropna()
95+
x2 = x1[~np.isnan(x2)]
96+
9397
if len(x1) != len(x2):
9498
raise ValueError("x1 and x2 are not the same length.")
9599

@@ -134,7 +138,8 @@ def __init__(
134138

135139
elif x2 is not None and paired is None:
136140
diff = True
137-
x2 = pd.Series(x2).dropna()
141+
# x2 = pd.Series(x2).dropna()
142+
x2 = x2[~np.isnan(x2)]
138143
# Generate statarrays for both arrays.
139144
ref_statarray = sns.algorithms.bootstrap(x1, **sns_bootstrap_kwargs)
140145
exp_statarray = sns.algorithms.bootstrap(x2, **sns_bootstrap_kwargs)

dabest/_dabest_object.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def __init__(
112112
# Determine the kind of estimation plot we need to produce.
113113
if all([isinstance(i, (str, int, float)) for i in idx]):
114114
# flatten out idx.
115-
all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
115+
all_plot_groups = pd.Series([t for t in idx]).unique().tolist()
116116
if len(idx) > len(all_plot_groups):
117117
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
118118
raise ValueError(err0)
@@ -122,7 +122,7 @@ def __init__(
122122
self.__idx = (idx,)
123123

124124
elif all([isinstance(i, (tuple, list)) for i in idx]):
125-
all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()
125+
all_plot_groups = pd.Series([tt for t in idx for tt in t]).unique().tolist()
126126

127127
actual_groups_given = sum([len(i) for i in idx])
128128

dabest/_delta_objects.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,13 +388,14 @@ def __init__(self, effectsizedataframe, permutation_count,
388388
# compute the variances of each control group and each test group
389389
control_var=[]
390390
test_var=[]
391+
grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}
391392
for j, current_tuple in enumerate(idx):
392393
cname = current_tuple[0]
393-
control = dat[dat[xvar] == cname][yvar].copy()
394+
control = grouped_data[cname]
394395
control_var.append(np.var(control, ddof=1))
395396

396397
tname = current_tuple[1]
397-
test = dat[dat[xvar] == tname][yvar].copy()
398+
test = grouped_data[tname]
398399
test_var.append(np.var(test, ddof=1))
399400
self.__control_var = np.array(control_var)
400401
self.__test_var = np.array(test_var)
@@ -414,7 +415,7 @@ def __init__(self, effectsizedataframe, permutation_count,
414415
self.__bootstraps)
415416

416417
# Compute the weighted average mean difference based on the raw data
417-
self.__difference = es.weighted_delta(self.__effsizedf["difference"],
418+
self.__difference = es.weighted_delta(np.array(self.__effsizedf["difference"]),
418419
self.__group_var)
419420

420421
sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)

dabest/_effsize_objects.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010
import lqrt
1111
from scipy.stats import norm
12+
import numpy as np
1213
from numpy import array, isnan, isinf, repeat, random, isin, abs, var
1314
from numpy import sort as npsort
1415
from numpy import nan as npnan
@@ -357,12 +358,17 @@ def _perform_statistical_test(self):
357358
# References:
358359
# https://en.wikipedia.org/wiki/McNemar%27s_test
359360

360-
df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
361-
x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
362-
x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
363-
x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
364-
x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
365-
table = [[x1, x2], [x3, x4]]
361+
# df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
362+
# x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
363+
# x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
364+
# x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
365+
# x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
366+
# table = [[x1, x2], [x3, x4]]
367+
x1 = np.sum((self.__control == 0) & (self.__test == 0))
368+
x2 = np.sum((self.__control == 0) & (self.__test == 1))
369+
x3 = np.sum((self.__control == 1) & (self.__test == 0))
370+
x4 = np.sum((self.__control == 1) & (self.__test == 1))
371+
table = np.array([[x1, x2], [x3, x4]])
366372
_mcnemar = mcnemar(table, exact=True, correction=True)
367373
self.__pvalue_mcnemar = _mcnemar.pvalue
368374
self.__statistic_mcnemar = _mcnemar.statistic
@@ -861,18 +867,19 @@ def __pre_calc(self):
861867
out = []
862868
reprs = []
863869

870+
grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}
864871
if self.__delta2:
865872
mixed_data = []
866873
for j, current_tuple in enumerate(idx):
867874
if self.__is_paired != "sequential":
868875
cname = current_tuple[0]
869-
control = dat[dat[xvar] == cname][yvar].copy()
876+
control = grouped_data[cname]
870877

871878
for ix, tname in enumerate(current_tuple[1:]):
872879
if self.__is_paired == "sequential":
873880
cname = current_tuple[ix]
874-
control = dat[dat[xvar] == cname][yvar].copy()
875-
test = dat[dat[xvar] == tname][yvar].copy()
881+
control = grouped_data[cname]
882+
test = grouped_data[tname]
876883
mixed_data.append(control)
877884
mixed_data.append(test)
878885
bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(
@@ -888,13 +895,13 @@ def __pre_calc(self):
888895
for j, current_tuple in enumerate(idx):
889896
if self.__is_paired != "sequential":
890897
cname = current_tuple[0]
891-
control = dat[dat[xvar] == cname][yvar].copy()
898+
control = grouped_data[cname]
892899

893900
for ix, tname in enumerate(current_tuple[1:]):
894901
if self.__is_paired == "sequential":
895902
cname = current_tuple[ix]
896-
control = dat[dat[xvar] == cname][yvar].copy()
897-
test = dat[dat[xvar] == tname][yvar].copy()
903+
control = grouped_data[cname]
904+
test = grouped_data[tname]
898905

899906
result = TwoGroupsEffectSize(
900907
control,
@@ -1055,16 +1062,18 @@ def __calc_lqrt(self):
10551062

10561063
out = []
10571064

1065+
grouped_data = {name:group[yvar].copy() for name, group in dat.groupby(xvar)}
1066+
10581067
for j, current_tuple in enumerate(db_obj.idx):
10591068
if self.__is_paired != "sequential":
10601069
cname = current_tuple[0]
1061-
control = dat[dat[xvar] == cname][yvar].copy()
1070+
control = grouped_data[cname]
10621071

10631072
for ix, tname in enumerate(current_tuple[1:]):
10641073
if self.__is_paired == "sequential":
10651074
cname = current_tuple[ix]
1066-
control = dat[dat[xvar] == cname][yvar].copy()
1067-
test = dat[dat[xvar] == tname][yvar].copy()
1075+
control = grouped_data[cname]
1076+
test = grouped_data[tname]
10681077

10691078
if self.__is_paired:
10701079
# Refactored here in v0.3.0 for performance issues.

dabest/_modidx.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
'dabest/_stats_tools/confint_2group_diff.py'),
2626
'dabest._stats_tools.confint_2group_diff._create_two_group_jackknife_indexes': ( 'API/confint_2group_diff.html#_create_two_group_jackknife_indexes',
2727
'dabest/_stats_tools/confint_2group_diff.py'),
28+
'dabest._stats_tools.confint_2group_diff.bootstrap_indices': ( 'API/confint_2group_diff.html#bootstrap_indices',
29+
'dabest/_stats_tools/confint_2group_diff.py'),
2830
'dabest._stats_tools.confint_2group_diff.calculate_group_var': ( 'API/confint_2group_diff.html#calculate_group_var',
2931
'dabest/_stats_tools/confint_2group_diff.py'),
3032
'dabest._stats_tools.confint_2group_diff.calculate_weighted_delta': ( 'API/confint_2group_diff.html#calculate_weighted_delta',
@@ -42,11 +44,17 @@
4244
'dabest._stats_tools.confint_2group_diff.create_jackknife_indexes': ( 'API/confint_2group_diff.html#create_jackknife_indexes',
4345
'dabest/_stats_tools/confint_2group_diff.py'),
4446
'dabest._stats_tools.confint_2group_diff.create_repeated_indexes': ( 'API/confint_2group_diff.html#create_repeated_indexes',
45-
'dabest/_stats_tools/confint_2group_diff.py')},
46-
'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor',
47+
'dabest/_stats_tools/confint_2group_diff.py'),
48+
'dabest._stats_tools.confint_2group_diff.delta2_bootstrap_loop': ( 'API/confint_2group_diff.html#delta2_bootstrap_loop',
49+
'dabest/_stats_tools/confint_2group_diff.py')},
50+
'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._cliffs_delta_core': ( 'API/effsize.html#_cliffs_delta_core',
51+
'dabest/_stats_tools/effsize.py'),
52+
'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor',
4753
'dabest/_stats_tools/effsize.py'),
4854
'dabest._stats_tools.effsize._compute_standardizers': ( 'API/effsize.html#_compute_standardizers',
4955
'dabest/_stats_tools/effsize.py'),
56+
'dabest._stats_tools.effsize._mann_whitney_u': ( 'API/effsize.html#_mann_whitney_u',
57+
'dabest/_stats_tools/effsize.py'),
5058
'dabest._stats_tools.effsize.cliffs_delta': ( 'API/effsize.html#cliffs_delta',
5159
'dabest/_stats_tools/effsize.py'),
5260
'dabest._stats_tools.effsize.cohens_d': ( 'API/effsize.html#cohens_d',
@@ -61,6 +69,8 @@
6169
'dabest/_stats_tools/effsize.py'),
6270
'dabest._stats_tools.effsize.weighted_delta': ( 'API/effsize.html#weighted_delta',
6371
'dabest/_stats_tools/effsize.py')},
72+
'dabest._stats_tools.precompile': { 'dabest._stats_tools.precompile.precompile_all': ( 'API/precompile.html#precompile_all',
73+
'dabest/_stats_tools/precompile.py')},
6474
'dabest.forest_plot': { 'dabest.forest_plot.extract_plot_data': ( 'API/forest_plot.html#extract_plot_data',
6575
'dabest/forest_plot.py'),
6676
'dabest.forest_plot.forest_plot': ('API/forest_plot.html#forest_plot', 'dabest/forest_plot.py'),

0 commit comments

Comments
 (0)