Add extra delta-delta violin to 2x2 plot

LI-Yixuan · LI-Yixuan · commit 2d1713eb3f98 · 2021-07-06T22:51:59.000+08:00
- Add extra delta-delta violin to 2x2 plot
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,4 @@ real.py
 0to2_beforeduringafter.csv
 test.ipynb
 TrhCsCh.csv
+*.py
diff --git a/dabest/_api.py b/dabest/_api.py
@@ -5,7 +5,8 @@
 
 
 def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
-        ci=95, resamples=5000, random_seed=12345, proportional=False, var2 = False, status = None):
+        ci=95, resamples=5000, random_seed=12345, proportional=False, 
+        delta2 = False, experiment = None):
     '''
     Loads data in preparation for estimation statistics.
 
@@ -18,7 +19,10 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
         List of column names (if 'x' is not supplied) or of category names
         (if 'x' is supplied). This can be expressed as a tuple of tuples,
         with each individual tuple producing its own contrast plot
-    x : string, default None
+    x : string or list, default None
+        Column name(s) of the independent variable. This can be expressed as
+        a list of 2 elements if and only if 'delta2' is True; otherwise it 
+        can only be a string.
     y : string, default None
         Column names for data to be plotted on the x-axis and y-axis.
     paired : string, default None
@@ -37,6 +41,19 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
         reported are replicable.
     proportional : boolean, default False. 
         TO INCLUDE MORE DESCRIPTION ABOUT DATA FORMAT
+    delta2 : boolean, default False
+        Indicator of delta-delta experiment
+    experiment : String, default None
+        The name of the column of the dataframe which contains the label of 
+        experiments
+    experiment_lab : list, default None
+        A list of String to specify the order of subplots for delta-delta plots.
+        This can be expressed as a list of 2 elements if and only if 'delta2' 
+        is True; otherwise it can only be a string. 
+    x1_level : list, default None
+        A list of String to specify the order of subplots for delta-delta plots.
+        This can be expressed as a list of 2 elements if and only if 'delta2' 
+        is True; otherwise it can only be a string. 
 
     Returns
     -------
@@ -65,4 +82,4 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None,
     '''
     from ._classes import Dabest
 
-    return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, var2, status)
+    return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment)
diff --git a/dabest/_classes.py b/dabest/_classes.py
@@ -10,7 +10,7 @@ class Dabest(object):
     """
 
     def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
-                random_seed, proportional, var2, status):
+                random_seed, proportional, delta2, experiment):
 
         """
         Parses and stores pandas DataFrames in preparation for estimation
@@ -23,8 +23,8 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
         import pandas as pd
         import seaborn as sns
 
-        self.__var2         = var2
-        self.__status       = status
+        self.__delta2       = delta2
+        self.__experiment   = experiment
         self.__ci           = ci
         self.__data         = data
         self.__id_col       = id_col
@@ -41,38 +41,38 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
 
 
         # check if this is a 2x2 ANOVA case and x & y are valid columns:
-        if var2:
+        if delta2:
             if len(x) != 2:
-                err0 = '`var2` is True but the number of variables indicated by `x` is {}.'.format(len(x))
+                err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x))
                 raise ValueError(err0)
             if any(i not in data_in.columns for i in x):
                 err = 'Not all of {0} is a column in `data`. Please check.'.format(x)
                 raise IndexError(err)
             if y not in data_in.columns:
                 err = '{0} is not a column in `data`. Please check.'.format(y)
                 raise IndexError(err)
-            if status not in data_in.columns:
-                err = '{0} is not a column in `data`. Please check.'.format(status)
+            if experiment not in data_in.columns:
+                err = '{0} is not a column in `data`. Please check.'.format(experiment)
                 raise IndexError(err)
 
 
 
         # check if idx is specified
-        if not var2 and not idx:
+        if not delta2 and not idx:
             err = '`idx` is not a column in `data`. Please check.'
             raise IndexError(err)
 
 
         # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case
-        if var2:
-            # add a new column which is a combination of experiment status and the first variable
-            new_col_name = status+x[0]
+        if delta2:
+            # add a new column which is a combination of experiment and the first variable
+            new_col_name = experiment+x[0]
             while new_col_name in data_in.columns:
                 new_col_name += "_"
-            data_in[new_col_name] = data_in[x[0]].apply(lambda x: str(x)) + " " + data_in[status].apply(lambda x: str(x))
+            data_in[new_col_name] = data_in[x[0]].apply(lambda x: str(x)) + " " + data_in[experiment].apply(lambda x: str(x))
 
             #create idx            
-            experiment = data_in[status].unique()
+            experiment = data_in[experiment].unique()
             x1_level = data_in[x[0]].unique()
             idx = []
             for i in experiment:
@@ -91,7 +91,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
             self.__second     = None
             self.__idx        = idx
             self.__first      = None
-            self.__experiment = experiment
+            self.__experiment = None
 
         # Determine the kind of estimation plot we need to produce.
         if all([isinstance(i, str) for i in idx]):
@@ -254,7 +254,11 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
         EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired,
                                            random_seed=random_seed,
                                            resamples=resamples,
-                                           proportional=proportional, var2=var2, second=self.__second)
+                                           proportional=proportional, 
+                                           delta2=delta2, 
+                                           experiment = self.__experiment,
+                                           first = self.__first,
+                                           second=self.__second)
 
         self.__mean_diff    = EffectSizeDataFrame(self, "mean_diff",
                                                 **EffectSizeDataFrame_kwargs)
@@ -323,6 +327,9 @@ def __repr__(self):
                 for ix, test_name in enumerate(current_tuple[1:]):
                     comparisons.append("{} minus {}".format(test_name, control_name))
 
+        if self.__delta2:
+            comparison.append("{} minus {}".format(self.__experiment[1], self.__experiment[0]))
+        
         for j, g in enumerate(comparisons):
             out.append("{}. {}".format(j+1, g))
 
@@ -565,16 +572,26 @@ def idx(self):
         return self.__idx
     
 
+    @property
+    def first(self):
+        return self.__first
+
+
     @property
     def second(self):
         return self.__second
 
 
     @property
-    def var2(self):
-        return self.__var2
+    def experiment(self):
+        return self.__experiment
     
 
+    @property
+    def delta2(self):
+        return self.__delta2
+
+
     @property
     def is_paired(self):
         """
@@ -687,7 +704,8 @@ def __init__(self, control, test, effect_size,
                  is_paired=None, ci=95,
                  resamples=5000, 
                  permutation_count=5000, 
-                 random_seed=12345):
+                 random_seed=12345,
+                 first=None, delta2=False):
 
         """
         Compute the effect size between two groups.
@@ -851,20 +869,24 @@ def __init__(self, control, test, effect_size,
         self.__random_seed       = random_seed
         self.__ci                = ci
         self.__alpha             = ci2g._compute_alpha_from_ci(ci)
-
+        self.__delta2              = delta2
+        self.__first             = first
 
         self.__difference = es.two_group_difference(
                                 control, test, is_paired, effect_size)
-
+        
         self.__jackknives = ci2g.compute_meandiff_jackknife(
                                 control, test, is_paired, effect_size)
 
         self.__acceleration_value = ci2g._calc_accel(self.__jackknives)
 
-        bootstraps = ci2g.compute_bootstrapped_diff(
+        if not delta2:
+            bootstraps = ci2g.compute_bootstrapped_diff(
                             control, test, is_paired, effect_size,
                             resamples, random_seed)
-        self.__bootstraps = npsort(bootstraps)
+            self.__bootstraps = npsort(bootstraps)
+        else:
+            self.__bootstraps = npsort(self.__test-self.__control)
         
         # Added in v0.2.6.
         # Raises a UserWarning if there are any infiinities in the bootstraps.
@@ -924,14 +946,19 @@ def __init__(self, control, test, effect_size,
                 self.__bca_high  = self.__difference
                 warnings.warn(err_temp.substitute(lim_type="upper"),
                               stacklevel=0)
+        if not self.__delta2:
+            # Compute percentile intervals.
+            pct_idx_low  = int((self.__alpha/2)     * resamples)
+            pct_idx_high = int((1-(self.__alpha/2)) * resamples)
 
-        # Compute percentile intervals.
-        pct_idx_low  = int((self.__alpha/2)     * resamples)
-        pct_idx_high = int((1-(self.__alpha/2)) * resamples)
-
-        self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
-        self.__pct_low  = self.__bootstraps[pct_idx_low]
-        self.__pct_high = self.__bootstraps[pct_idx_high]
+            self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
+            self.__pct_low  = self.__bootstraps[pct_idx_low]
+            self.__pct_high = self.__bootstraps[pct_idx_high]
+        
+        else:
+            self.__pct_interval_idx = None
+            self.__pct_low  = None
+            self.__pct_high = None
 
         # Perform statistical tests.
                 
@@ -1067,7 +1094,10 @@ def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):
                       "es"           : self.__EFFECT_SIZE_DICT[self.__effect_size],
                       "paired_status": PAIRED_STATUS[str(self.__is_paired)]}
         
-        out1 = "The {paired_status} {es} {rm_status}".format(**first_line)
+        if self.__delta2:
+            out1 = "The delta-delta {es}"
+        else:
+            out1 = "The {paired_status} {es} {rm_status}".format(**first_line)
         
         base_string_fmt = "{:." + str(sigfig) + "}"
         if "." in str(self.__ci):
@@ -1454,7 +1484,9 @@ def __init__(self, dabest, effect_size,
                  is_paired, ci=95, proportional=False,
                  resamples=5000, 
                  permutation_count=5000,
-                 random_seed=12345, second=None, var2=False):
+                 random_seed=12345, 
+                 first=None, second=None, 
+                 delta2=False, experiment=None):
         """
         Parses the data from a Dabest object, enabling plotting and printing
         capability for the effect size of interest.
@@ -1468,8 +1500,10 @@ def __init__(self, dabest, effect_size,
         self.__permutation_count = permutation_count
         self.__random_seed       = random_seed
         self.__proportional      = proportional
+        self.__first             = first
+        self.__experiment        = experiment 
         self.__second            = second
-        self.__var2              = var2 
+        self.__delta2              = delta2 
 
 
     def __pre_calc(self):
@@ -1524,6 +1558,28 @@ def __pre_calc(self):
 
                 reprs.append(text_repr)
 
+        if self.__delta2:
+            delta = TwoGroupsEffectSize(out[0]["bootstraps"], 
+                                    out[1]["bootstraps"],
+                                    self.__effect_size,
+                                    True,
+                                    self.__ci,
+                                    self.__resamples,
+                                    self.__permutation_count,
+                                    self.__random_seed,
+                                    self.__first,
+                                    self.__delta2
+                                    )
+            r_dict = delta.to_dict()
+            r_dict["control"]   = self.__experiment[1]
+            r_dict["test"]      = self.__experiment[0]
+            r_dict["control_N"] = self.__resamples
+            r_dict["test_N"]    = self.__resamples
+            out.append(r_dict)
+            to_replace = "between {} and {} is".format(self.__experiment[0], self.__experiment[1])
+            text_repr = text_repr.replace("is", to_replace, 1)
+            reprs.append(text_repr)
+
         varname = get_varname(self.__dabest_obj)
         lastline = "To get the results of all valid statistical tests, " +\
         "use `{}.{}.statistical_tests`".format(varname, self.__effect_size)
@@ -1849,7 +1905,7 @@ def plot(self, color_col=None,
         if hasattr(self, "results") is False:
             self.__pre_calc()
 
-        if self.__var2:
+        if self.__delta2:
             color_col = self.__second
 
         if self.__proportional:
@@ -1934,14 +1990,24 @@ def ci(self):
         """
         return self.__ci
 
+    @property
+    def first(self):
+        return self.__first
+
+
     @property
     def second(self):
         return self.__second
 
 
     @property
-    def var2(self):
-        return self.__var2
+    def experiment(self):
+        return self.__experiment
+    
+
+    @property
+    def delta2(self):
+        return self.__delta2
     
 
     @property
diff --git a/dabest/plotter.py b/dabest/plotter.py