ACCLAB
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dabest/_classes.py‎
Lines changed: 1 addition & 1 deletion b/‎dabest/_classes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dabest/plot_tools.py‎
Lines changed: 180 additions & 2 deletions b/‎dabest/plot_tools.py‎
Lines changed: 180 additions & 2 deletions
diff --git a/‎dabest/plotter.py‎
Lines changed: 43 additions & 26 deletions b/‎dabest/plotter.py‎
Lines changed: 43 additions & 26 deletions
diff --git a/‎…mings_multi_group_unpaired__propdiff.png‎ ‎…mmings_multi_group_unpaired_propdiff.png‎dabest/tests/baseline_images/test_105_cummings_multi_group_unpaired__propdiff.png renamed to dabest/tests/baseline_images/test_105_cummings_multi_group_unpaired_propdiff.png
31.6 KB b/‎…mings_multi_group_unpaired__propdiff.png‎ ‎…mmings_multi_group_unpaired_propdiff.png‎dabest/tests/baseline_images/test_105_cummings_multi_group_unpaired__propdiff.png renamed to dabest/tests/baseline_images/test_105_cummings_multi_group_unpaired_propdiff.png
31.6 KB
diff --git a/‎dabest/tests/baseline_images/test_123_sankey_gardner_altman.png‎
-29 Bytes b/‎dabest/tests/baseline_images/test_123_sankey_gardner_altman.png‎
-29 Bytes
diff --git a/‎dabest/tests/baseline_images/test_123_style_sheets.png‎
-31.3 KB b/‎dabest/tests/baseline_images/test_123_style_sheets.png‎
-31.3 KB
diff --git a/‎dabest/tests/baseline_images/test_124_sankey_cummings.png‎
17 Bytes b/‎dabest/tests/baseline_images/test_124_sankey_cummings.png‎
17 Bytes
diff --git a/‎dabest/tests/baseline_images/test_125_sankey_2paired_groups.png‎
-354 Bytes b/‎dabest/tests/baseline_images/test_125_sankey_2paired_groups.png‎
-354 Bytes
diff --git a/‎dabest/tests/baseline_images/test_126_sankey_2sequential_groups.png‎
-354 Bytes b/‎dabest/tests/baseline_images/test_126_sankey_2sequential_groups.png‎
-354 Bytes
@@ -1,6 +1,6 @@
 The Clear BSD License
 
-Copyright (c) 2016-2020 Joses W. Ho
+Copyright (c) 2016-2023 Joses W. Ho
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 
@@ -188,7 +188,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci,
                 raise ValueError(err0 + err1 + err2)
 
         else: # mix of string and tuple?
-            err = 'There seems to be a problem with the idx you'
+            err = 'There seems to be a problem with the idx you '\
             'entered--{}.'.format(idx)
             raise ValueError(err)
 
 
@@ -388,6 +388,174 @@ def proportion_error_bar(data, x, y, type='mean_sd', offset=0.2, ax=None,
         #                           [central_measure, central_measure], **kwargs)
         # ax.add_line(mean_line)
 
+def sankey_error_bar(data, x, y, type='mean_sd', offset=0.2, ax=None,
+                 line_color="black", gap_width_percent=1, pos=[0,1], 
+                 **kwargs):
+    '''
+    Function to plot the standard devations for proportions as vertical
+    errorbars. The mean is a gap defined by negative space.
+
+    This is a specific design with the addition of parameter `xpos`
+    for Sankey as each Sankey bar requires two errorbars, one for 
+    the left and one for the right. 
+
+    This style is inspired by Edward Tufte's redesign of the boxplot.
+    See The Visual Display of Quantitative Information (1983), pp.128-130.
+
+    Keywords
+    --------
+    data: pandas DataFrame.
+        This DataFrame should be in 'long' format.
+
+    x, y: string.
+        x and y columns to be plotted.
+
+    type: ['mean_sd', 'median_quartiles'], default 'mean_sd'
+        Plots the summary statistics for each group. If 'mean_sd', then the
+        mean and standard deviation of each group is plotted as a gapped line.
+        If 'median_quantiles', then the median and 25th and 75th percentiles of
+        each group is plotted instead.
+
+    offset: float (default 0.3) or iterable.
+        Give a single float (that will be used as the x-offset of all
+        gapped lines), or an iterable containing the list of x-offsets.
+
+    line_color: string (matplotlib color, default "black") or iterable of
+        matplotlib colors.
+
+        The color of the vertical line indicating the stadard deviations.
+
+    gap_width_percent: float, default 5
+        The width of the gap in the line (indicating the central measure),
+        expressed as a percentage of the y-span of the axes.
+
+    ax: matplotlib Axes object, default None
+        If a matplotlib Axes object is specified, the gapped lines will be
+        plotted in order on this axes. If None, the current axes (plt.gca())
+        is used.
+
+    xpos: float, default 0
+        The x-position of the gapped lines. This is useful if you want to
+        plot multiple gapped lines on the same axes, but with different
+        x-positions.
+
+    kwargs: dict, default None
+        Dictionary with kwargs passed to matplotlib.lines.Line2D
+    '''
+    import numpy as np
+    import pandas as pd
+    import matplotlib.pyplot as plt
+    import matplotlib.lines as mlines
+
+    if gap_width_percent < 0 or gap_width_percent > 100:
+        raise ValueError("`gap_width_percent` must be between 0 and 100.")
+
+    if ax is None:
+        ax = plt.gca()
+    ax_ylims = ax.get_ylim()
+    ax_yspan = np.abs(ax_ylims[1] - ax_ylims[0])
+    gap_width = ax_yspan * gap_width_percent / 100
+
+    keys = kwargs.keys()
+    if 'clip_on' not in keys:
+        kwargs['clip_on'] = False
+
+    if 'zorder' not in keys:
+        kwargs['zorder'] = 5
+
+    if 'lw' not in keys:
+        kwargs['lw'] = 2.
+
+    # # Grab the order in which the groups appear.
+    # group_order = pd.unique(data[x])
+
+    # Grab the order in which the groups appear,
+    # depending on whether the x-column is categorical.
+    if isinstance(data[x].dtype, pd.CategoricalDtype):
+        group_order = pd.unique(data[x]).categories
+    else:
+        group_order = pd.unique(data[x])
+
+    means = data.groupby(x)[y].mean().reindex(index=group_order)
+    g = lambda x: np.sqrt((np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)))
+    sd = data.groupby(x)[y].apply(g)
+    # sd = data.groupby(x)[y].std().reindex(index=group_order)
+    lower_sd = means - sd
+    upper_sd = means + sd
+
+    if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():
+        kwargs['clip_on'] = True
+
+    medians = data.groupby(x)[y].median().reindex(index=group_order)
+    quantiles = data.groupby(x)[y].quantile([0.25, 0.75]) \
+        .unstack() \
+        .reindex(index=group_order)
+    lower_quartiles = quantiles[0.25]
+    upper_quartiles = quantiles[0.75]
+
+    if type == 'mean_sd':
+        central_measures = means
+        lows = lower_sd
+        highs = upper_sd
+    elif type == 'median_quartiles':
+        central_measures = medians
+        lows = lower_quartiles
+        highs = upper_quartiles
+
+    n_groups = len(central_measures)
+
+    if isinstance(line_color, str):
+        custom_palette = np.repeat(line_color, n_groups)
+    else:
+        if len(line_color) != n_groups:
+            err1 = "{} groups are being plotted, but ".format(n_groups)
+            err2 = "{} colors(s) were supplied in `line_color`.".format(len(line_color))
+            raise ValueError(err1 + err2)
+        custom_palette = line_color
+
+    try:
+        len_offset = len(offset)
+    except TypeError:
+        offset = np.repeat(offset, n_groups)
+        len_offset = len(offset)
+
+    if len_offset != n_groups:
+        err1 = "{} groups are being plotted, but ".format(n_groups)
+        err2 = "{} offset(s) were supplied in `offset`.".format(len_offset)
+        raise ValueError(err1 + err2)
+
+    kwargs['zorder'] = kwargs['zorder']
+
+    for xpos, central_measure in enumerate(central_measures):
+        # add lower vertical span line.
+
+        kwargs['color'] = custom_palette[xpos]
+
+        _xpos = pos[xpos] + offset[xpos]
+        # add lower vertical span line.
+        low = lows[xpos]
+        low_to_mean = mlines.Line2D([_xpos, _xpos],
+                                    [low, central_measure - gap_width],
+                                    **kwargs)
+        ax.add_line(low_to_mean)
+
+        # add upper vertical span line.
+        high = highs[xpos]
+        mean_to_high = mlines.Line2D([_xpos, _xpos],
+                                     [central_measure + gap_width, high],
+                                     **kwargs)
+        ax.add_line(mean_to_high)
+
+        # # add horzontal central measure line.
+        # kwargs['zorder'] = 6
+        # kwargs['color'] = gap_color
+        # kwargs['lw'] = kwargs['lw'] * 1.5
+        # line_xpos = xpos + offset[xpos]
+        # mean_line = mlines.Line2D([line_xpos-0.015, line_xpos+0.015],
+        #                           [central_measure, central_measure], **kwargs)
+        # ax.add_line(mean_line)
+
+
 def check_data_matches_labels(labels, data, side):
     '''
     Function to check that the labels and data match in the sankey diagram. 
@@ -418,7 +586,7 @@ def check_data_matches_labels(labels, data, side):
 
 def single_sankey(left, right, xpos=0, leftWeight=None, rightWeight=None, 
             colorDict=None, leftLabels=None, rightLabels=None, ax=None, 
-            width=0.5, alpha=0.65, bar_width=0.1, rightColor=False, align='center'):
+            width=0.5, alpha=0.65, bar_width=0.2, rightColor=False, align='center'):
 
     '''
     Make a single Sankey diagram showing proportion flow from left to right
@@ -535,6 +703,10 @@ def single_sankey(left, right, xpos=0, leftWeight=None, rightWeight=None,
     else: 
         leftpos = xpos
 
+    # Combine left and right arrays to have a pandas.DataFrame in the 'long' format
+    left_series = pd.Series(left, name='values').to_frame().assign(groups='left')
+    right_series = pd.Series(right, name='values').to_frame().assign(groups='right')
+    concatenated_df = pd.concat([left_series, right_series], ignore_index=True)
 
     # Determine positions of left label patches and total widths
     # We also want the height of the graph to be 1
@@ -623,6 +795,10 @@ def normalize_dict(nested_dict, target):
             color=colorDict[rightLabel],
             alpha=0.99
         )
+
+    # Plot error bars
+    sankey_error_bar(concatenated_df, x='groups', y='values', ax=ax, offset=0, gap_width_percent=2,
+                     pos=[(leftpos + (-(bar_width) * xMax) + leftpos)/2, (xMax + leftpos + leftpos + ((1 + bar_width) * xMax))/2],)
 
     # Plot strips
     for leftLabel in leftLabels:
@@ -654,7 +830,7 @@ def sankeydiag(data, xvar, yvar, left_idx, right_idx,
                 leftLabels=None, rightLabels=None,  
                 palette=None, ax=None, 
                 one_sankey=False,
-                width=0.5, rightColor=False,
+                width=0.4, rightColor=False,
                 align='center', alpha=0.65, **kwargs):
     '''
     Read in melted pd.DataFrame, and draw multiple sankey diagram on a single axes
@@ -666,6 +842,8 @@ def sankeydiag(data, xvar, yvar, left_idx, right_idx,
     --------
     data: pd.DataFrame
         input data, melted dataframe created by dabest.load()
+    xvar, yvar: string.
+        x and y columns to be plotted.
     left_idx: str
         the value in column xvar that is on the left side of each sankey diagram
     right_idx: str
 
@@ -41,6 +41,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
     import seaborn as sns
     import matplotlib.pyplot as plt
     import pandas as pd
+    import warnings
+    warnings.filterwarnings('ignore', 'This figure includes Axes that are not compatible with tight_layout')
 
     from .misc_tools import merge_two_dicts
     from .plot_tools import halfviolin, get_swarm_spans, gapped_lines, proportion_error_bar, sankeydiag
@@ -127,9 +129,9 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
                                          plot_kwargs["barplot_kwargs"])
 
     # Sankey Diagram kwargs
-    default_sankey_kwargs = {"width": 0.5, "align": "center",
+    default_sankey_kwargs = {"width": 0.4, "align": "center",
                             "alpha": 0.4, "rightColor": False,
-                            "bar_width":0.1}
+                            "bar_width":0.2}
     if plot_kwargs["sankey_kwargs"] is None:
         sankey_kwargs = default_sankey_kwargs
     else:
@@ -365,7 +367,6 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
         contrast_axes = axx[1]
     rawdata_axes.set_frame_on(False)
     contrast_axes.set_frame_on(False)
-    # fig.set_tight_layout(False)
 
     redraw_axes_kwargs = {'colors'     : ytick_color,
                           'facecolors' : ytick_color,
@@ -384,25 +385,33 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
 
     if show_pairs is True:
         if is_paired == "baseline":
-            temp_idx = []
-            for i in idx:
-                control = i[0]
-                temp_idx.extend(((control, test) for test in i[1:]))
-            temp_idx = tuple(temp_idx)
-
-            temp_all_plot_groups = []
-            for i in temp_idx:
-                temp_all_plot_groups.extend(list(i))
+            if proportional == False:
+                temp_idx = idx
+                temp_all_plot_groups = all_plot_groups
+            else:   
+                temp_idx = []
+                for i in idx:
+                    control = i[0]
+                    temp_idx.extend(((control, test) for test in i[1:]))
+                temp_idx = tuple(temp_idx)
+
+                temp_all_plot_groups = []
+                for i in temp_idx:
+                    temp_all_plot_groups.extend(list(i))
         else:
-            temp_idx = []
-            for i in idx:
-                for j in range(len(i)-1):
-                    control = i[j]
-                    test = i[j+1]
-                    temp_idx.append((control, test))
-            temp_all_plot_groups = []
-            for i in temp_idx:
-                temp_all_plot_groups.extend(list(i))
+            if proportional == False:
+                temp_idx = idx
+                temp_all_plot_groups = all_plot_groups
+            else:
+                temp_idx = []
+                for i in idx:
+                    for j in range(len(i)-1):
+                        control = i[j]
+                        test = i[j+1]
+                        temp_idx.append((control, test))
+                temp_all_plot_groups = []
+                for i in temp_idx:
+                    temp_all_plot_groups.extend(list(i))
         if proportional==False:
         # Plot the raw data as a slopegraph.
         # Pivot the long (melted) data.
@@ -445,9 +454,9 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
             # Set the tick labels, because the slopegraph plotting doesn't.
             rawdata_axes.set_xticks(np.arange(0, len(temp_all_plot_groups)))
             rawdata_axes.set_xticklabels(temp_all_plot_groups)
+            
         else:
             # Plot the raw data as a set of Sankey Diagrams aligned like barplot.
-
             group_summaries = plot_kwargs["group_summaries"]
             if group_summaries is None:
                 group_summaries = "mean_sd"
@@ -588,9 +597,14 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
             ticks_to_start_sankey.pop()
             ticks_to_start_sankey.insert(0, 0)
         else:
-            ticks_to_skip = np.arange(0, len(temp_all_plot_groups), 2).tolist()
-            ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
-            ticks_to_skip_contrast = np.cumsum([(len(t)-1)*2 for t in idx])[:-1].tolist()
+            # ticks_to_skip = np.arange(0, len(temp_all_plot_groups), 2).tolist()
+            # ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
+            ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()
+            ticks_to_skip.insert(0, 0)
+            # Then obtain the ticks where we have to plot the effect sizes.
+            ticks_to_plot = [t for t in range(0, len(all_plot_groups))
+                        if t not in ticks_to_skip]
+            ticks_to_skip_contrast = np.cumsum([(len(t)) for t in idx])[:-1].tolist()
             ticks_to_skip_contrast.insert(0, 0)
     else:
         if proportional == True and one_sankey == False:
@@ -974,7 +988,10 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
                 ax.set_ylim(ylim)
                 del redraw_axes_kwargs['y']
 
-            temp_length = [(len(i)-1)*2-1 for i in idx]
+            if proportional == False:
+                temp_length = [(len(i)-1) for i in idx]
+            else:
+                temp_length = [(len(i)-1)*2-1 for i in idx]
             if proportional == True and one_sankey == False:
                 rightend_ticks_contrast = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_sankey)
             else: