Merge branch 'sankey'

Jacobluke- · Jacobluke- · commit c34638f94685 · 2023-02-25T11:16:58.000+08:00
diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py
@@ -17,8 +17,8 @@ class bootstrap:
             the two groups (x2-x1) is computed.
             NaNs are automatically discarded.
 
-        paired: boolean, default False
-            Whether or not x1 and x2 are paired samples.
+        paired : string, default None
+            The type of the experiment under which the data are obtained
 
         statfunction: callable, default np.mean
             The summary statistic called on data.
@@ -47,8 +47,8 @@ class bootstrap:
             Whether or not the summary is the difference between two groups.
             If False, only x1 was supplied.
 
-        is_paired: boolean
-            Whether or not the difference reported is between 2 paired groups.
+        is_paired : string, default None
+            The type of the experiment under which the data are obtained
 
         statistic: callable
             The function used to compute the summary.
@@ -85,19 +85,19 @@ class bootstrap:
 
         pvalue_2samp_ind_ttest: float
             P-value obtained from scipy.stats.ttest_ind.
-            If a single array was given (x1 only), or if `paired` is True,
+            If a single array was given (x1 only), or if `paired` is not None,
             returns 'NIL'.
             See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.stats.ttest_ind.html
 
         pvalue_2samp_related_ttest: float
             P-value obtained from scipy.stats.ttest_rel.
-            If a single array was given (x1 only), or if `paired` is False,
+            If a single array was given (x1 only), or if `paired` is None,
             returns 'NIL'.
             See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.stats.ttest_rel.html
 
         pvalue_wilcoxon: float
             P-value obtained from scipy.stats.wilcoxon.
-            If a single array was given (x1 only), or if `paired` is False,
+            If a single array was given (x1 only), or if `paired` is None,
             returns 'NIL'.
             The Wilcoxons signed-rank test is a nonparametric paired test of
             the null hypothesis that the related samples x1 and x2 are from
@@ -113,7 +113,7 @@ class bootstrap:
 
     '''
     def __init__(self, x1, x2=None,
-        paired=False,
+        paired=None,
         statfunction=None,
         smoothboot=False,
         alpha_level=0.05,
@@ -155,7 +155,7 @@ def __init__(self, x1, x2=None,
                 if len(x1) != len(x2):
                     raise ValueError('x1 and x2 are not the same length.')
 
-        if (x2 is None) or (paired is True) :
+        if (x2 is None) or (paired is not None) :
 
             if x2 is None:
                 tx = x1
@@ -165,7 +165,7 @@ def __init__(self, x1, x2=None,
                 ttest_2_paired = 'NIL'
                 wilcoxonresult = 'NIL'
 
-            elif paired is True:
+            elif paired is not None:
                 diff = True
                 tx = x2 - x1
                 ttest_single = 'NIL'
@@ -188,7 +188,7 @@ def __init__(self, x1, x2=None,
             pct_low_high = np.nan_to_num(pct_low_high).astype('int')
 
 
-        elif x2 is not None and paired is False:
+        elif x2 is not None and paired is None:
             diff = True
             x2 = pd.Series(x2).dropna()
             # Generate statarrays for both arrays.
@@ -268,7 +268,7 @@ def __repr__(self):
         else:
             stat = self.statistic
 
-        diff_types = {True: 'paired', False: 'unpaired'}
+        diff_types = {'sequential': 'paired', 'baseline': 'paired', None: 'unpaired'}
         if self.is_difference:
             a = 'The {} {} difference is {}.'.format(diff_types[self.is_paired],
                     stat, self.summary)
diff --git a/dabest/_classes.py b/dabest/_classes.py
@@ -2548,7 +2548,9 @@ def __pre_calc(self):
                             'proportional_difference'
                            ]
         self.__results   = out_.reindex(columns=columns_in_order)
-        self.__results.dropna(axis="columns", how="all", inplace=True)
+        # The is_paired column could be NaNs, so we keep it.
+        subset_cols = columns_in_order.remove('is_paired')
+        self.__results.dropna(axis="columns", subset=subset_cols, how="all", inplace=True)
 
         if self.__delta2 is True and self.__effect_size == "mean_diff":
             self.__delta_delta = DeltaDelta(self,
diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py
@@ -14,7 +14,7 @@
 """
 
 
-def two_group_difference(control, test, is_paired=False,
+def two_group_difference(control, test, is_paired=None,
                         effect_size="mean_diff"):
     """
     Computes the following metrics for control and test:
@@ -33,8 +33,8 @@ def two_group_difference(control, test, is_paired=False,
     control, test: list, tuple, or ndarray.
         Accepts lists, tuples, or numpy ndarrays of numeric types.
 
-    is_paired: boolean, default False.
-        If True, returns the paired Cohen's d.
+    is_paired: string, default None.
+        If not None, returns the paired Cohen's d.
 
     effect_size: string, default "mean_diff"
         Any one of the following effect sizes:
@@ -97,8 +97,8 @@ def two_group_difference(control, test, is_paired=False,
         return hedges_g(control, test, is_paired)
 
     elif effect_size == "cliffs_delta":
-        if is_paired is True:
-            err1 = "`is_paired` is True; therefore Cliff's delta is not defined."
+        if is_paired:
+            err1 = "`is_paired` is not None; therefore Cliff's delta is not defined."
             raise ValueError(err1)
         else:
             return cliffs_delta(control, test)
@@ -116,9 +116,9 @@ def func_difference(control, test, func, is_paired):
 
         func: summary function to apply.
 
-        is_paired: boolean.
-            If True, computes func(test - control).
-            If False, computes func(test) - func(control).
+        is_paired: string.
+            If not None, computes func(test - control).
+            If None, computes func(test) - func(control).
 
     Returns:
     --------
@@ -159,7 +159,7 @@ def func_difference(control, test, func, is_paired):
 
 
 
-def cohens_d(control, test, is_paired=False):
+def cohens_d(control, test, is_paired=None):
     """
     Computes Cohen's d for test v.s. control.
     See https://en.wikipedia.org/wiki/Effect_size#Cohen's_d
@@ -168,16 +168,15 @@ def cohens_d(control, test, is_paired=False):
     --------
     control, test: List, tuple, or array.
 
-    is_paired: boolean, default False
-        If True, the paired Cohen's d is returned.
+    is_paired: string, default None
+        If not None, the paired Cohen's d is returned.
 
     Returns
     -------
         d: float.
-            If is_paired is False, this is equivalent to:
+            If is_paired is None, this is equivalent to:
             (numpy.mean(test) - numpy.mean(control))  / pooled StDev
-
-            If is_paired is True, returns
+            If is_paired is not None, returns
             (numpy.mean(test) - numpy.mean(control))  / average StDev
 
             The pooled standard deviation is equal to:
@@ -290,7 +289,7 @@ def cohens_h(control, test):
 
     
 
-def hedges_g(control, test, is_paired=False):
+def hedges_g(control, test, is_paired=None):
     """
     Computes Hedges' g for  for test v.s. control.
     It first computes Cohen's d, then calulates a correction factor based on
diff --git a/dabest/tests/test_10_proportion_plot.py b/dabest/tests/test_10_proportion_plot.py
@@ -12,17 +12,10 @@
 
 two_groups_unpaired = load(df, idx=("Control 1", "Test 1"), proportional=True)
 
-two_groups_paired   = load(df, idx=("Control 1", "Test 1"),
-                           paired="baseline", id_col="ID",proportional=True)
-
 multi_2group = load(df, idx=(("Control 1", "Test 1",),
                              ("Control 2", "Test 2")),
                     proportional=True)
 
-multi_2group_paired = load(df, idx=(("Control 1", "Test 1"),
-                                 ("Control 2", "Test 2")),
-                            paired="baseline", id_col="ID", proportional=True)
-
 shared_control = load(df, idx=("Control 1", "Test 1",
                                 "Test 2", "Test 3",
                                 "Test 4", "Test 5", "Test 6"),
@@ -31,8 +24,32 @@
 multi_groups = load(df, idx=(("Control 1", "Test 1",),
                              ("Control 2", "Test 2","Test 3"),
                              ("Control 3", "Test 4","Test 5", "Test 6")
-                             ),proportional=True
-                    )
+                             ),proportional=True)
+
+two_groups_paired   = load(df, idx=("Control 1", "Test 1"),
+                           paired="baseline", id_col="ID",proportional=True)
+
+multi_2group_paired = load(df, idx=(("Control 1", "Test 1"),
+                                 ("Control 2", "Test 2")),
+                            paired="baseline", id_col="ID", proportional=True)
+
+multi_groups_paired = load(df, idx=(("Control 1", "Test 1",),
+                             ("Control 2", "Test 2","Test 3"),
+                             ("Control 3", "Test 4","Test 5", "Test 6")
+                             ),paired="baseline", id_col="ID", proportional=True)
+
+two_groups_sequential   = load(df, idx=("Control 1", "Test 1"),
+                           paired="sequential", id_col="ID",proportional=True)
+
+multi_2group_sequential = load(df, idx=(("Control 1", "Test 1"),
+                                 ("Control 2", "Test 2")),
+                            paired="sequential", id_col="ID", proportional=True)
+
+multi_groups_sequential = load(df, idx=(("Control 1", "Test 1",),
+                             ("Control 2", "Test 2","Test 3"),
+                             ("Control 3", "Test 4","Test 5", "Test 6")
+                             ),paired="sequential", id_col="ID", proportional=True)
+
 
 @pytest.mark.mpl_image_compare
 def test_101_gardner_altman_unpaired_propdiff():