Intermediate stage

Jacobluke- · Jacobluke- · commit 9d9a554672c3 · 2023-02-08T18:49:00.000+08:00
Still having problems:
1. Contrast axes ticks
2. Two plots showing together
3. NOT aligned xtick labels in sequential paired
diff --git a/dabest/_classes.py b/dabest/_classes.py
@@ -2785,7 +2785,7 @@ def plot(self, color_col=None,
             paired proportional data when `show_pairs=True` and `proportional=True`. 
             Pass any keyword arguments accepted by plot_tools.sankeydiag() function
             here, as a dict. If None, the following keywords are passed to sankey diagram:
-            {"width": 0.5, "align": "center", "alpha": 0.65, "rightColor": False}
+            {"width": 0.5, "align": "center", "alpha": 0.4, "bar_width": 0.1, "rightColor": False}
         reflines_kwargs : dict, default None
             This will change the appearance of the zero reference lines. Pass
             any keyword arguments accepted by the matplotlib Axes `hlines`
diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py
@@ -418,7 +418,7 @@ def check_data_matches_labels(labels, data, side):
 
 def single_sankey(left, right, xpos=0, leftWeight=None, rightWeight=None, 
             colorDict=None, leftLabels=None, rightLabels=None, ax=None, 
-            width=0.5, alpha=0.65, rightColor=False, align='center'):
+            width=0.5, alpha=0.65, bar_width=0.1, rightColor=False, align='center'):
 
     '''
     Make a single Sankey diagram showing proportion flow from left to right
@@ -489,17 +489,17 @@ def single_sankey(left, right, xpos=0, leftWeight=None, rightWeight=None,
         raise Exception('Sankey graph does not support null values.')
 
     # Identify all labels that appear 'left' or 'right'
-    allLabels = pd.Series(np.r_[dataFrame.left.unique(), dataFrame.right.unique()]).unique()
+    allLabels = pd.Series(np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]).unique()
 
     # Identify left labels
     if len(leftLabels) == 0:
-        leftLabels = pd.Series(np.sort(dataFrame.left.unique())).unique()
+        leftLabels = pd.Series(np.sort(dataFrame.left.unique())[::-1]).unique()
     else:
         check_data_matches_labels(leftLabels, dataFrame['left'], 'left')
 
     # Identify right labels
     if len(rightLabels) == 0:
-        rightLabels = pd.Series(np.sort(dataFrame.right.unique())).unique()
+        rightLabels = pd.Series(np.sort(dataFrame.right.unique())[::-1]).unique()
     else:
         check_data_matches_labels(leftLabels, dataFrame['right'], 'right')
 
@@ -602,15 +602,15 @@ def normalize_dict(nested_dict, target):
     # Plot vertical bars for each label
     for leftLabel in leftLabels:
         ax.fill_between(
-            [leftpos + (-0.05 * xMax), leftpos],
+            [leftpos + (-(bar_width) * xMax), leftpos],
             2 * [leftWidths_norm[leftLabel]["bottom"]],
             2 * [leftWidths_norm[leftLabel]["bottom"] + leftWidths_norm[leftLabel]["left"]],
             color=colorDict[leftLabel],
             alpha=0.99,
         )
     for rightLabel in rightLabels:
         ax.fill_between(
-            [xMax + leftpos, leftpos + (1.05 * xMax)], 
+            [xMax + leftpos, leftpos + ((1 + bar_width) * xMax)], 
             2 * [rightWidths_norm[rightLabel]['bottom']],
             2 * [rightWidths_norm[rightLabel]['bottom'] + rightWidths_norm[rightLabel]['right']],
             color=colorDict[rightLabel],
@@ -698,11 +698,14 @@ def sankeydiag(data, xvar, yvar, left_idx, right_idx,
     
     if "rightColor" in kwargs:
         rightColor = kwargs["rightColor"]
+    
+    if "bar_width" in kwargs:
+        bar_width = kwargs["bar_width"]
 
     if ax is None:
         fig, ax = plt.subplots()
 
-    allLabels = data[yvar].unique()
+    allLabels = pd.Series(np.sort(data[yvar].unique())[::-1]).unique()
         
     # Check if all the elements in left_idx and right_idx are in xvar column
     if not all(elem in data[xvar].unique() for elem in left_idx):
@@ -733,12 +736,14 @@ def sankeydiag(data, xvar, yvar, left_idx, right_idx,
         colorPalette = sns.color_palette(palette, len(allLabels))
         for i, label in enumerate(allLabels):
             plot_palette[label] = colorPalette[i]
+    else:
+        plot_palette = None
 
     for left, right in zip(broadcasted_left, right_idx):
         single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], 
                         xpos=xpos, ax=ax, colorDict=plot_palette, width=width, 
                         leftLabels=leftLabels, rightLabels=rightLabels, 
-                        rightColor=rightColor, 
+                        rightColor=rightColor, bar_width=bar_width,
                         align=align, alpha=alpha)
         xpos += 1
 
diff --git a/dabest/plotter.py b/dabest/plotter.py
@@ -128,7 +128,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
 
     # Sankey Diagram kwargs
     default_sankey_kwargs = {"width": 0.5, "align": "center",
-                            "alpha": 0.65, "rightColor": False}
+                            "alpha": 0.4, "rightColor": False,
+                            "bar_width":0.1}
     if plot_kwargs["sankey_kwargs"] is None:
         sankey_kwargs = default_sankey_kwargs
     else:
@@ -253,7 +254,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
         # For Sankey Diagram plot, no need to worry about the color, each bar will have the same two colors
         # default color palette will be set to "hls"
         plot_palette_sankey = None
-        
+
     else:
         swarm_colors = [sns.desaturate(c, swarm_desat) for c in unsat_colors]
         plot_palette_raw = dict(zip(names, swarm_colors))
@@ -388,16 +389,9 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
             temp_all_plot_groups = []
             for i in temp_idx:
                 temp_all_plot_groups.extend(list(i))
-            # TODO - Figure out how to draw sankey diagram for baseline paired.
-            # sankey_control_group = [all_plot_groups[0]]
-            # sankey_test_group = all_plot_groups.copy()
-            # sankey_test_group.pop(0)
         else:
             temp_idx = idx
             temp_all_plot_groups = all_plot_groups
-            sankey_control_group = [all_plot_groups[0]]
-            sankey_test_group = all_plot_groups.copy()
-            sankey_test_group.pop(0)
 
         if proportional==False:
         # Plot the raw data as a slopegraph.
@@ -451,6 +445,17 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
             if err_color == None:
                 err_color = "black"
 
+            if show_pairs is True:
+                if is_paired == "baseline":
+                    sankey_control_group = []
+                    sankey_test_group = []
+                    for i in temp_idx:
+                        sankey_control_group.append(i[0])
+                        sankey_test_group.append(i[1])                   
+                else:
+                    sankey_control_group = all_plot_groups[:-1]
+                    sankey_test_group = all_plot_groups[1:]
+
             # Replace the paired proportional plot with sankey diagram
             sankey = sankeydiag(plot_data, xvar=xvar, yvar=yvar, 
                                 left_idx=sankey_control_group, 
@@ -562,17 +567,24 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
         rawdata_axes.legend().set_visible(False)
 
 
-    #TODO: There is a bug for setting `is_paired` to be "baseline": Cannot achieve multiple tests vs. one control.
+    #TODO: When setting 'baseline', the plot is shrinked together and the contrast axes position should be fixed
     # Plot effect sizes and bootstraps.
     # Take note of where the `control` groups are.
     if is_paired == "baseline" and show_pairs == True:
-        ticks_to_skip = np.arange(0, len(temp_all_plot_groups), 2).tolist()
-        ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
-        ticks_to_skip_contrast = np.cumsum([(len(t)-1)*2 for t in idx])[:-1].tolist()
-        ticks_to_skip_contrast.insert(0, 0)
+        if proportional == True:
+            ticks_to_skip = []
+            ticks_to_plot = np.arange(0, len(temp_all_plot_groups)/2).tolist()
+        else:
+            ticks_to_skip = np.arange(0, len(temp_all_plot_groups), 2).tolist()
+            ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
+            ticks_to_skip_contrast = np.cumsum([(len(t)-1)*2 for t in idx])[:-1].tolist()
+            ticks_to_skip_contrast.insert(0, 0)
     else:
-        ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()
-        ticks_to_skip.insert(0, 0)
+        if proportional == True:
+            ticks_to_skip = [len(sankey_control_group)]
+        else:
+            ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()
+            ticks_to_skip.insert(0, 0)
 
         # Then obtain the ticks where we have to plot the effect sizes.
         ticks_to_plot = [t for t in range(0, len(all_plot_groups))
@@ -586,6 +598,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
     results      = EffectSizeDataFrame.results
     contrast_xtick_labels = []
 
+    #TODO: Why is there always two plots showing together
+
     #TODO: The contrast axes xticks is still to be fixed
     for j, tick in enumerate(ticks_to_plot):
         current_group     = results.test[j]
@@ -694,9 +708,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
         contrast_axes.set_xlim(rawdata_axes.get_xlim())
 
     # Properly label the contrast ticks.
-    if not (proportional==True and is_paired is not None):
-        for t in ticks_to_skip:
-            contrast_xtick_labels.insert(t, "")
+    for t in ticks_to_skip:
+        contrast_xtick_labels.insert(t, "")
     contrast_axes.set_xticklabels(contrast_xtick_labels)
 
     if bootstraps_color_by_group is False: