Skip to content

Commit 9d9a554

Browse files
committed
Intermediate stage
Still having problems: 1. Contrast axes ticks 2. Two plots showing together 3. NOT aligned xtick labels in sequential paired
1 parent c823fab commit 9d9a554

3 files changed

Lines changed: 46 additions & 28 deletions

File tree

dabest/_classes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2785,7 +2785,7 @@ def plot(self, color_col=None,
27852785
paired proportional data when `show_pairs=True` and `proportional=True`.
27862786
Pass any keyword arguments accepted by plot_tools.sankeydiag() function
27872787
here, as a dict. If None, the following keywords are passed to sankey diagram:
2788-
{"width": 0.5, "align": "center", "alpha": 0.65, "rightColor": False}
2788+
{"width": 0.5, "align": "center", "alpha": 0.4, "bar_width": 0.1, "rightColor": False}
27892789
reflines_kwargs : dict, default None
27902790
This will change the appearance of the zero reference lines. Pass
27912791
any keyword arguments accepted by the matplotlib Axes `hlines`

dabest/plot_tools.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ def check_data_matches_labels(labels, data, side):
418418

419419
def single_sankey(left, right, xpos=0, leftWeight=None, rightWeight=None,
420420
colorDict=None, leftLabels=None, rightLabels=None, ax=None,
421-
width=0.5, alpha=0.65, rightColor=False, align='center'):
421+
width=0.5, alpha=0.65, bar_width=0.1, rightColor=False, align='center'):
422422

423423
'''
424424
Make a single Sankey diagram showing proportion flow from left to right
@@ -489,17 +489,17 @@ def single_sankey(left, right, xpos=0, leftWeight=None, rightWeight=None,
489489
raise Exception('Sankey graph does not support null values.')
490490

491491
# Identify all labels that appear 'left' or 'right'
492-
allLabels = pd.Series(np.r_[dataFrame.left.unique(), dataFrame.right.unique()]).unique()
492+
allLabels = pd.Series(np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]).unique()
493493

494494
# Identify left labels
495495
if len(leftLabels) == 0:
496-
leftLabels = pd.Series(np.sort(dataFrame.left.unique())).unique()
496+
leftLabels = pd.Series(np.sort(dataFrame.left.unique())[::-1]).unique()
497497
else:
498498
check_data_matches_labels(leftLabels, dataFrame['left'], 'left')
499499

500500
# Identify right labels
501501
if len(rightLabels) == 0:
502-
rightLabels = pd.Series(np.sort(dataFrame.right.unique())).unique()
502+
rightLabels = pd.Series(np.sort(dataFrame.right.unique())[::-1]).unique()
503503
else:
504504
check_data_matches_labels(leftLabels, dataFrame['right'], 'right')
505505

@@ -602,15 +602,15 @@ def normalize_dict(nested_dict, target):
602602
# Plot vertical bars for each label
603603
for leftLabel in leftLabels:
604604
ax.fill_between(
605-
[leftpos + (-0.05 * xMax), leftpos],
605+
[leftpos + (-(bar_width) * xMax), leftpos],
606606
2 * [leftWidths_norm[leftLabel]["bottom"]],
607607
2 * [leftWidths_norm[leftLabel]["bottom"] + leftWidths_norm[leftLabel]["left"]],
608608
color=colorDict[leftLabel],
609609
alpha=0.99,
610610
)
611611
for rightLabel in rightLabels:
612612
ax.fill_between(
613-
[xMax + leftpos, leftpos + (1.05 * xMax)],
613+
[xMax + leftpos, leftpos + ((1 + bar_width) * xMax)],
614614
2 * [rightWidths_norm[rightLabel]['bottom']],
615615
2 * [rightWidths_norm[rightLabel]['bottom'] + rightWidths_norm[rightLabel]['right']],
616616
color=colorDict[rightLabel],
@@ -698,11 +698,14 @@ def sankeydiag(data, xvar, yvar, left_idx, right_idx,
698698

699699
if "rightColor" in kwargs:
700700
rightColor = kwargs["rightColor"]
701+
702+
if "bar_width" in kwargs:
703+
bar_width = kwargs["bar_width"]
701704

702705
if ax is None:
703706
fig, ax = plt.subplots()
704707

705-
allLabels = data[yvar].unique()
708+
allLabels = pd.Series(np.sort(data[yvar].unique())[::-1]).unique()
706709

707710
# Check if all the elements in left_idx and right_idx are in xvar column
708711
if not all(elem in data[xvar].unique() for elem in left_idx):
@@ -733,12 +736,14 @@ def sankeydiag(data, xvar, yvar, left_idx, right_idx,
733736
colorPalette = sns.color_palette(palette, len(allLabels))
734737
for i, label in enumerate(allLabels):
735738
plot_palette[label] = colorPalette[i]
739+
else:
740+
plot_palette = None
736741

737742
for left, right in zip(broadcasted_left, right_idx):
738743
single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar],
739744
xpos=xpos, ax=ax, colorDict=plot_palette, width=width,
740745
leftLabels=leftLabels, rightLabels=rightLabels,
741-
rightColor=rightColor,
746+
rightColor=rightColor, bar_width=bar_width,
742747
align=align, alpha=alpha)
743748
xpos += 1
744749

dabest/plotter.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
128128

129129
# Sankey Diagram kwargs
130130
default_sankey_kwargs = {"width": 0.5, "align": "center",
131-
"alpha": 0.65, "rightColor": False}
131+
"alpha": 0.4, "rightColor": False,
132+
"bar_width":0.1}
132133
if plot_kwargs["sankey_kwargs"] is None:
133134
sankey_kwargs = default_sankey_kwargs
134135
else:
@@ -253,7 +254,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
253254
# For Sankey Diagram plot, no need to worry about the color, each bar will have the same two colors
254255
# default color palette will be set to "hls"
255256
plot_palette_sankey = None
256-
257+
257258
else:
258259
swarm_colors = [sns.desaturate(c, swarm_desat) for c in unsat_colors]
259260
plot_palette_raw = dict(zip(names, swarm_colors))
@@ -388,16 +389,9 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
388389
temp_all_plot_groups = []
389390
for i in temp_idx:
390391
temp_all_plot_groups.extend(list(i))
391-
# TODO - Figure out how to draw sankey diagram for baseline paired.
392-
# sankey_control_group = [all_plot_groups[0]]
393-
# sankey_test_group = all_plot_groups.copy()
394-
# sankey_test_group.pop(0)
395392
else:
396393
temp_idx = idx
397394
temp_all_plot_groups = all_plot_groups
398-
sankey_control_group = [all_plot_groups[0]]
399-
sankey_test_group = all_plot_groups.copy()
400-
sankey_test_group.pop(0)
401395

402396
if proportional==False:
403397
# Plot the raw data as a slopegraph.
@@ -451,6 +445,17 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
451445
if err_color == None:
452446
err_color = "black"
453447

448+
if show_pairs is True:
449+
if is_paired == "baseline":
450+
sankey_control_group = []
451+
sankey_test_group = []
452+
for i in temp_idx:
453+
sankey_control_group.append(i[0])
454+
sankey_test_group.append(i[1])
455+
else:
456+
sankey_control_group = all_plot_groups[:-1]
457+
sankey_test_group = all_plot_groups[1:]
458+
454459
# Replace the paired proportional plot with sankey diagram
455460
sankey = sankeydiag(plot_data, xvar=xvar, yvar=yvar,
456461
left_idx=sankey_control_group,
@@ -562,17 +567,24 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
562567
rawdata_axes.legend().set_visible(False)
563568

564569

565-
#TODO: There is a bug for setting `is_paired` to be "baseline": Cannot achieve multiple tests vs. one control.
570+
#TODO: When setting 'baseline', the plot is shrinked together and the contrast axes position should be fixed
566571
# Plot effect sizes and bootstraps.
567572
# Take note of where the `control` groups are.
568573
if is_paired == "baseline" and show_pairs == True:
569-
ticks_to_skip = np.arange(0, len(temp_all_plot_groups), 2).tolist()
570-
ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
571-
ticks_to_skip_contrast = np.cumsum([(len(t)-1)*2 for t in idx])[:-1].tolist()
572-
ticks_to_skip_contrast.insert(0, 0)
574+
if proportional == True:
575+
ticks_to_skip = []
576+
ticks_to_plot = np.arange(0, len(temp_all_plot_groups)/2).tolist()
577+
else:
578+
ticks_to_skip = np.arange(0, len(temp_all_plot_groups), 2).tolist()
579+
ticks_to_plot = np.arange(1, len(temp_all_plot_groups), 2).tolist()
580+
ticks_to_skip_contrast = np.cumsum([(len(t)-1)*2 for t in idx])[:-1].tolist()
581+
ticks_to_skip_contrast.insert(0, 0)
573582
else:
574-
ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()
575-
ticks_to_skip.insert(0, 0)
583+
if proportional == True:
584+
ticks_to_skip = [len(sankey_control_group)]
585+
else:
586+
ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()
587+
ticks_to_skip.insert(0, 0)
576588

577589
# Then obtain the ticks where we have to plot the effect sizes.
578590
ticks_to_plot = [t for t in range(0, len(all_plot_groups))
@@ -586,6 +598,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
586598
results = EffectSizeDataFrame.results
587599
contrast_xtick_labels = []
588600

601+
#TODO: Why is there always two plots showing together
602+
589603
#TODO: The contrast axes xticks is still to be fixed
590604
for j, tick in enumerate(ticks_to_plot):
591605
current_group = results.test[j]
@@ -694,9 +708,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):
694708
contrast_axes.set_xlim(rawdata_axes.get_xlim())
695709

696710
# Properly label the contrast ticks.
697-
if not (proportional==True and is_paired is not None):
698-
for t in ticks_to_skip:
699-
contrast_xtick_labels.insert(t, "")
711+
for t in ticks_to_skip:
712+
contrast_xtick_labels.insert(t, "")
700713
contrast_axes.set_xticklabels(contrast_xtick_labels)
701714

702715
if bootstraps_color_by_group is False:

0 commit comments

Comments
 (0)