Skip to content

Commit 187c3c5

Browse files
committed
Adjust delta-delta plot for paired data
- support plots for paired data - add a slight change to the test cases - restrict the delta-delta plots for only mean_diff
1 parent 2d1713e commit 187c3c5

4 files changed

Lines changed: 112 additions & 61 deletions

File tree

dabest/_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
88
ci=95, resamples=5000, random_seed=12345, proportional=False,
9-
delta2 = False, experiment = None):
9+
delta2 = False, experiment = None, experiment_label = None,
10+
x1_level = None):
1011
'''
1112
Loads data in preparation for estimation statistics.
1213
@@ -82,4 +83,4 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
8283
'''
8384
from ._classes import Dabest
8485

85-
return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment)
86+
return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level)

dabest/_classes.py

Lines changed: 99 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ class Dabest(object):
99
Class for estimation statistics and plots.
1010
"""
1111

12-
def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
13-
random_seed, proportional, delta2, experiment):
12+
def __init__(self, data, idx, x, y, paired, id_col, ci,
13+
resamples, random_seed, proportional, delta2,
14+
experiment, experiment_label, x1_level):
1415

1516
"""
1617
Parses and stores pandas DataFrames in preparation for estimation
@@ -40,21 +41,65 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
4041

4142

4243

43-
# check if this is a 2x2 ANOVA case and x & y are valid columns:
44+
# check if this is a 2x2 ANOVA case and x & y are valid columns
45+
# create experiment_label and x1_level
4446
if delta2:
47+
# idx should not be specified
48+
if idx:
49+
err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x))
50+
raise ValueError(err0)
51+
52+
# check if x is valid
4553
if len(x) != 2:
4654
err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x))
4755
raise ValueError(err0)
48-
if any(i not in data_in.columns for i in x):
49-
err = 'Not all of {0} is a column in `data`. Please check.'.format(x)
50-
raise IndexError(err)
51-
if y not in data_in.columns:
56+
else:
57+
for i in x:
58+
if i not in data_in.columns:
59+
err = '{0} is a column in `data`. Please check.'.format(i)
60+
raise IndexError(err)
61+
62+
# check if y is valid
63+
if not y:
64+
err0 = '`delta2` is True but `y` is not indicated.'
65+
raise ValueError(err0)
66+
elif y not in data_in.columns:
5267
err = '{0} is not a column in `data`. Please check.'.format(y)
5368
raise IndexError(err)
69+
70+
# check if experiment is valid
5471
if experiment not in data_in.columns:
5572
err = '{0} is not a column in `data`. Please check.'.format(experiment)
5673
raise IndexError(err)
5774

75+
# check if experiment_label is valid and create experiment when needed
76+
if experiment_label:
77+
if len(experiment_label) != 2:
78+
err0 = '`experiment_label` does not have a length of 2.'
79+
raise ValueError(err0)
80+
else:
81+
for i in experiment_label:
82+
if i not in data_in[experiment].unique():
83+
err = '{0} is an element in the column `{1}` of `data`. Please check.'.format(i, experiment)
84+
raise IndexError(err)
85+
else:
86+
experiment_label = data_in[experiment].unique()
87+
88+
# check if x1_level is valid
89+
if x1_level:
90+
if len(x1_level) != 2:
91+
err0 = '`x1_level` does not have a length of 2.'
92+
raise ValueError(err0)
93+
else:
94+
for i in x1_level:
95+
if i not in data_in[x[0]].unique():
96+
err = '{0} is an element in the column `{1}` of `data`. Please check.'.format(i, experiment)
97+
raise IndexError(err)
98+
99+
else:
100+
x1_level = data_in[x[0]].unique()
101+
self.__experiment_label = experiment_label
102+
self.__x1_level = x1_level
58103

59104

60105
# check if idx is specified
@@ -72,26 +117,21 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
72117
data_in[new_col_name] = data_in[x[0]].apply(lambda x: str(x)) + " " + data_in[experiment].apply(lambda x: str(x))
73118

74119
#create idx
75-
experiment = data_in[experiment].unique()
76-
x1_level = data_in[x[0]].unique()
77120
idx = []
78-
for i in experiment:
121+
for i in experiment_label:
79122
temp = []
80123
for j in x1_level:
81124
temp.append(j + " " + i)
82125
idx.append(temp)
83-
self.__idx = idx
84-
self.__first = x1_level
85-
self.__experiment = experiment
126+
self.__idx = idx
127+
self.__x1 = x[0]
128+
self.__x2 = x[1]
86129
# record the second variable and create idx
87-
self.__second = x[1]
88-
x = new_col_name
89-
130+
x = new_col_name
90131
else:
91-
self.__second = None
92-
self.__idx = idx
93-
self.__first = None
94-
self.__experiment = None
132+
self.__idx = idx
133+
self.__x1 = None
134+
self.__x2 = None
95135

96136
# Determine the kind of estimation plot we need to produce.
97137
if all([isinstance(i, str) for i in idx]):
@@ -256,9 +296,9 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
256296
resamples=resamples,
257297
proportional=proportional,
258298
delta2=delta2,
259-
experiment = self.__experiment,
260-
first = self.__first,
261-
second=self.__second)
299+
experiment_label=self.__experiment_label,
300+
x1_level=self.__x1_level,
301+
x2=self.__x2)
262302

263303
self.__mean_diff = EffectSizeDataFrame(self, "mean_diff",
264304
**EffectSizeDataFrame_kwargs)
@@ -328,7 +368,7 @@ def __repr__(self):
328368
comparisons.append("{} minus {}".format(test_name, control_name))
329369

330370
if self.__delta2:
331-
comparison.append("{} minus {}".format(self.__experiment[1], self.__experiment[0]))
371+
comparison.append("{} minus {}".format(self.__experiment_label[1], self.__experiment_label[0]))
332372

333373
for j, g in enumerate(comparisons):
334374
out.append("{}. {}".format(j+1, g))
@@ -573,20 +613,30 @@ def idx(self):
573613

574614

575615
@property
576-
def first(self):
577-
return self.__first
616+
def x1(self):
617+
return self.__x1
578618

579619

580620
@property
581-
def second(self):
582-
return self.__second
621+
def x1_level(self):
622+
return self.__x1_level
623+
624+
625+
@property
626+
def x2(self):
627+
return self.__x2
583628

584629

585630
@property
586631
def experiment(self):
587632
return self.__experiment
588633

589634

635+
@property
636+
def experiment_label(self):
637+
return self.__experiment_label
638+
639+
590640
@property
591641
def delta2(self):
592642
return self.__delta2
@@ -705,7 +755,7 @@ def __init__(self, control, test, effect_size,
705755
resamples=5000,
706756
permutation_count=5000,
707757
random_seed=12345,
708-
first=None, delta2=False):
758+
delta2=False):
709759

710760
"""
711761
Compute the effect size between two groups.
@@ -870,7 +920,6 @@ def __init__(self, control, test, effect_size,
870920
self.__ci = ci
871921
self.__alpha = ci2g._compute_alpha_from_ci(ci)
872922
self.__delta2 = delta2
873-
self.__first = first
874923

875924
self.__difference = es.two_group_difference(
876925
control, test, is_paired, effect_size)
@@ -1485,8 +1534,8 @@ def __init__(self, dabest, effect_size,
14851534
resamples=5000,
14861535
permutation_count=5000,
14871536
random_seed=12345,
1488-
first=None, second=None,
1489-
delta2=False, experiment=None):
1537+
x1_level=None, x2=None,
1538+
delta2=False, experiment_label=None):
14901539
"""
14911540
Parses the data from a Dabest object, enabling plotting and printing
14921541
capability for the effect size of interest.
@@ -1500,10 +1549,10 @@ def __init__(self, dabest, effect_size,
15001549
self.__permutation_count = permutation_count
15011550
self.__random_seed = random_seed
15021551
self.__proportional = proportional
1503-
self.__first = first
1504-
self.__experiment = experiment
1505-
self.__second = second
1506-
self.__delta2 = delta2
1552+
self.__x1_level = x1_level
1553+
self.__experiment_label = experiment_label
1554+
self.__x2 = x2
1555+
self.__delta2 = delta2
15071556

15081557

15091558
def __pre_calc(self):
@@ -1558,7 +1607,7 @@ def __pre_calc(self):
15581607

15591608
reprs.append(text_repr)
15601609

1561-
if self.__delta2:
1610+
if self.__delta2 and self.__effect_size == "mean_diff":
15621611
delta = TwoGroupsEffectSize(out[0]["bootstraps"],
15631612
out[1]["bootstraps"],
15641613
self.__effect_size,
@@ -1567,19 +1616,20 @@ def __pre_calc(self):
15671616
self.__resamples,
15681617
self.__permutation_count,
15691618
self.__random_seed,
1570-
self.__first,
15711619
self.__delta2
15721620
)
15731621
r_dict = delta.to_dict()
1574-
r_dict["control"] = self.__experiment[1]
1575-
r_dict["test"] = self.__experiment[0]
1622+
r_dict["control"] = self.__experiment_label[0]
1623+
r_dict["test"] = self.__experiment_label[1]
15761624
r_dict["control_N"] = self.__resamples
15771625
r_dict["test_N"] = self.__resamples
15781626
out.append(r_dict)
1579-
to_replace = "between {} and {} is".format(self.__experiment[0], self.__experiment[1])
1627+
to_replace = "between {} and {} is".format(self.__experiment_label[0], self.__experiment_label[1])
15801628
text_repr = text_repr.replace("is", to_replace, 1)
15811629
reprs.append(text_repr)
1582-
1630+
else:
1631+
err0 = 'The calculation of delta-delta is not supported for {}.'.format(self.__effect_size)
1632+
raise ValueError(err0)
15831633
varname = get_varname(self.__dabest_obj)
15841634
lastline = "To get the results of all valid statistical tests, " +\
15851635
"use `{}.{}.statistical_tests`".format(varname, self.__effect_size)
@@ -1626,7 +1676,6 @@ def __pre_calc(self):
16261676

16271677
self.__results = out_.reindex(columns=columns_in_order)
16281678
self.__results.dropna(axis="columns", how="all", inplace=True)
1629-
16301679

16311680

16321681

@@ -1906,7 +1955,7 @@ def plot(self, color_col=None,
19061955
self.__pre_calc()
19071956

19081957
if self.__delta2:
1909-
color_col = self.__second
1958+
color_col = self.__x2
19101959

19111960
if self.__proportional:
19121961
raw_marker_size = 0.01
@@ -1991,18 +2040,18 @@ def ci(self):
19912040
return self.__ci
19922041

19932042
@property
1994-
def first(self):
1995-
return self.__first
2043+
def x1_level(self):
2044+
return self.__x1_level
19962045

19972046

19982047
@property
1999-
def second(self):
2000-
return self.__second
2048+
def x2(self):
2049+
return self.__x2
20012050

20022051

20032052
@property
2004-
def experiment(self):
2005-
return self.__experiment
2053+
def experiment_label(self):
2054+
return self.__experiment_label
20062055

20072056

20082057
@property

dabest/plotter.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
6565
yvar = EffectSizeDataFrame.yvar
6666
is_paired = EffectSizeDataFrame.is_paired
6767
delta2 = EffectSizeDataFrame.delta2
68-
first = EffectSizeDataFrame.first
69-
experiment = EffectSizeDataFrame.experiment
7068

7169
all_plot_groups = dabest_obj._all_plot_groups
7270
idx = dabest_obj.idx
@@ -363,20 +361,20 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
363361
current_pair = pivoted_plot_data
364362
else:
365363
current_pair = pivoted_plot_data[yvar]
366-
367364
grp_count = len(current_tuple)
368365
# Iterate through the data for the current tuple.
369366
for ID, observation in current_pair.iterrows():
370367
x_points = [t for t in range(x_start,x_start+grp_count)]
371368
y_points = observation.tolist()
372-
369+
373370
if color_col is None:
374371
slopegraph_kwargs['color'] = ytick_color
375372
else:
376373
color_key = pivoted_plot_data[color_col,
377374
current_tuple[0]].loc[ID]
378-
slopegraph_kwargs['color'] = plot_palette_raw[color_key]
379-
slopegraph_kwargs['label'] = color_key
375+
if not pd.isna(color_key):
376+
slopegraph_kwargs['color'] = plot_palette_raw[color_key]
377+
slopegraph_kwargs['label'] = color_key
380378

381379
rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs)
382380
x_start = x_start + grp_count
@@ -454,7 +452,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
454452
ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
455453
ticks_to_skip_contrast = np.cumsum([(len(t)-1)*2 for t in idx])[:-1].tolist()
456454
ticks_to_skip_contrast.insert(0, 0)
457-
455+
if delta2:
456+
ticks_to_skip_contrast.append(max(ticks_to_skip_contrast)+2)
458457
else:
459458
ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()
460459
ticks_to_skip.insert(0, 0)
@@ -529,7 +528,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
529528

530529
if float_contrast is True:
531530
contrast_axes.set_xlim(0.5, 1.5)
532-
elif delta2:
531+
elif not is_paired and delta2:
533532
temp = rawdata_axes.get_xlim()
534533
contrast_axes.set_xlim(temp[0], temp[1]+2)
535534
rawdata_axes.set_xlim(temp[0], temp[1]+2)

dabest/tests/test_04_repeated_measures_effsizes_pvals copy.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737

3838
# kwargs for Dabest class init.
3939
dabest_default_kwargs = dict(x=None, y=None, ci=95,
40-
resamples=5000, random_seed=12345, proportional=False)
40+
resamples=5000, random_seed=12345, proportional=False,
41+
delta2 = False, experiment=None,
42+
experiment_label=None, x1_level=None)
4143

4244
# example of sequential repeated measures
4345
sequential = Dabest(df, id_col = "ID",

0 commit comments

Comments
 (0)