Skip to content

Commit 4d6ca8e

Browse files
committed
Update single-cell simulation
1 parent a4491c0 commit 4d6ca8e

4 files changed

Lines changed: 123 additions & 25 deletions

File tree

paper/simu_nb/Plot.ipynb

Lines changed: 6 additions & 9 deletions
Large diffs are not rendered by default.

paper/simu_nb/simu_nb.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ do
33
Rscript simu_nb_data.R ${isimu}
44
cp simu_nb_data.R data/simu_100${isimu}/
55
cp simu_nb_fit.R data/simu_100${isimu}/
6-
Rscript --no-save --no-restore --verbose simu_nb_fit.R ${isimu} &> out${isimu}_fit.txt
7-
mv out${isimu}_fit.txt results/
8-
6+
for seed in $(seq 0 49);
7+
do
8+
Rscript --no-save --no-restore --verbose simu_nb_fit.R ${isimu} ${seed} &> out${isimu}_fit_${seed}.txt
9+
mv out${isimu}_fit_${seed}.txt results/
10+
done
911
python simu_nb_plot.py ${isimu} &> out${isimu}_plot.txt
1012
mv out${isimu}_plot.txt results/
11-
done
13+
done

paper/simu_nb/simu_nb_fit.R

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
library(rhdf5)
2+
3+
24
# Load Python and R functions for basic Wilcoxon Tests, DESeq2, and CocoA-Diff
35
# and causarray, cinemaot
46
require(reticulate)
@@ -15,8 +17,8 @@ n <- 200
1517
c = 0.1
1618
alpha = 0.1
1719

18-
ind <- '_d_1_r_4_noise_0.1'
19-
num_r <- 1
20+
ind <- '_d_1_r_4_noise_0.2'
21+
num_r <- 1; d <- 1
2022

2123
args = commandArgs(trailingOnly=TRUE)
2224
if(length(args)==0){
@@ -38,17 +40,15 @@ if((ind != '') && grepl( 'r', ind, fixed = TRUE)){
3840
}
3941

4042

41-
43+
seeds <- as.integer(args[2])
4244
for(n in c(100, 500, 1000, 5000)){
4345
path_result <- sprintf(paste0(path_base,'results/simu_%d%s/'), n, ind)
4446
dir.create(path_result, recursive=TRUE, showWarnings = FALSE)
4547

46-
for (seed in 0:49) {
48+
for (seed in c(seeds)) {
4749
path_data <- sprintf(paste0(path_base,'data/simu_%d%s/simu_data_%d.h5'), n, ind, seed)
4850

49-
if(file.exists(sprintf('%scausarray_r_%d_%d.csv', path_result, 6, seed))){
50-
next
51-
}
51+
cat(n, seed, '...\n')
5252

5353
Y <- t(h5read(path_data, '/Y'))
5454
metadata <- t(h5read(path_data, '/metadata'))
@@ -72,12 +72,13 @@ for(n in c(100, 500, 1000, 5000)){
7272
possibleError <- tryCatch(
7373
{
7474
# Wilcoxon Tests ----
75-
res.wilc <- run_wilcoxon(Y, metadata=scaleW, raw=T)
75+
res.wilc <- run_wilcoxon(Y, metadata=scaleW)
7676

7777
# DESeq ----
7878
res.DESeq <- run_DESeq(Y, metadata=scaleW, return_ds=TRUE)
7979
dds <- res.DESeq[[2]]
8080
res.DESeq <- res.DESeq[[1]]
81+
# causarray$comp_stat(theta, res.DESeq$padj<0.1, 0.1)
8182

8283
# CocoA-Diff ----
8384
cocoa <- run_cocoa(sc = Y, indvs=metadata[,2], metadata=scaleW, cocoAWriteName=sprintf('%stmp_cocoa/tmp_%d', path_result, seed))
@@ -97,20 +98,34 @@ for(n in c(100, 500, 1000, 5000)){
9798
W.cinemaotw <- res.cinemaotw[[2]]$W
9899
res.cinemaotw <- res.cinemaotw[[1]]
99100

101+
100102
# Save confounder estimation results
101103
write.csv(cf.cocoa, sprintf('%scocoa_cf_%d.csv', path_result, seed))
102104
write.csv(cf.cinemaot, sprintf('%scinemaot_cf_%d.csv', path_result, seed))
103105
write.csv(W.cinemaot, sprintf('%scinemaot_W_%d.csv', path_result, seed))
104106
write.csv(cf.cinemaotw, sprintf('%scinemaotw_cf_%d.csv', path_result, seed))
105107
write.csv(W.cinemaotw, sprintf('%scinemaotw_W_%d.csv', path_result, seed))
106108

109+
107110
# Save test results
108111
write.csv(res.wilc, sprintf('%swilc_%d.csv', path_result, seed))
109112
write.csv(res.DESeq, sprintf('%sDESeq_%d.csv', path_result, seed))
110113
write.csv(res.cocoa, sprintf('%scocoa_%d.csv', path_result, seed))
111114
write.csv(res.cinemaot, sprintf('%scinemaot_%d.csv', path_result, seed))
112115
write.csv(res.cinemaotw, sprintf('%scinemaotw_%d.csv', path_result, seed))
113116

117+
118+
# mixscape ----
119+
res.mixscape <- run_mixscape(Y, A, raw=TRUE)
120+
cf.mixscape <- res.mixscape[[2]]$Y_hat_0
121+
W.mixscape <- res.mixscape[[2]]$W
122+
res.mixscape <- res.mixscape[[1]]
123+
124+
write.csv(cf.mixscape, sprintf('%smixscape_cf_%d.csv', path_result, seed))
125+
write.csv(W.mixscape, sprintf('%smixscape_W_%d.csv', path_result, seed))
126+
write.csv(res.mixscape, sprintf('%smixscape_%d.csv', path_result, seed))
127+
128+
114129
for(r_hat in c(2,4,6)){
115130
# RUV
116131
ruv <- run_ruv(Y, metadata=scaleW, r_hat)
@@ -140,7 +155,7 @@ for(n in c(100, 500, 1000, 5000)){
140155

141156
res.causarray <- run_causarray(Y, scaleW[,-ncol(scaleW)], A,
142157
fdx=T, r=r_hat, glm_alpha=.5, shrinkage=T)
143-
cf.causarray <- log1p(res.causarray[[2]]$Y_hat_0)
158+
cf.causarray <- log1p(res.causarray[[2]]$Y_hat[,,1,1])
144159
W.causarray <- res.causarray[[2]]$W
145160
res.causarray <- res.causarray[[1]]
146161

paper/simu_nb/simu_nb_plot.py

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def legend_title_left(leg):
3131
else:
3232
ind = ''
3333
r_list = [2,4,6]
34-
method_list = ['wilc', 'DESeq', 'cocoa', 'cinemaot', 'cinemaotw'] \
34+
method_list = ['wilc', 'DESeq', 'cocoa', 'cinemaot', 'cinemaotw', 'mixscape'] \
3535
+ ['ruv_r_{}'.format(r) for r in r_list] \
3636
+ ['ruv3nb_r_{}'.format(r) for r in r_list] \
3737
+ ['causarray_r_{}'.format(r) for r in r_list]
@@ -83,6 +83,27 @@ def legend_title_left(leg):
8383
df_res.reset_index(drop=True, inplace=True)
8484
df_res.to_csv(path_base+'results/result{}_test.csv'.format(ind))
8585

86+
87+
sns.set(font_scale=1.2)
88+
fig, axes = plt.subplots(1,2, figsize=(10,4), sharex=True, sharey=False)
89+
for j, metric in enumerate(['FDR', 'power']):
90+
sns.boxplot(data=df_res, x='n', y=metric, hue='method', ax=axes[j])
91+
92+
axes[0].axhline(alpha, color='r', linestyle='--')
93+
lines_labels = [ax.get_legend_handles_labels() for ax in [axes[1]]]
94+
handles, labels = [sum(lol, []) for lol in zip(*lines_labels)]
95+
96+
axes[0].get_legend().remove()
97+
axes[1].get_legend().remove()
98+
legend = fig.legend(handles=handles, labels=labels,
99+
loc=9, ncol=5, title=None, frameon=False)
100+
legend_title_left(legend)
101+
102+
fig.tight_layout()
103+
fig.subplots_adjust(top=0.85)
104+
105+
plt.savefig(path_base+'results/simu{}_res.pdf'.format(ind), bbox_inches='tight', pad_inches=0, dpi=300)
106+
86107
print(df_res.groupby(['n','method'])[['typeI_err', 'FDR', 'power', 'FDX', 'num_dis']].median())
87108

88109

@@ -93,7 +114,7 @@ def legend_title_left(leg):
93114

94115

95116
r_list = [2,4,6]
96-
method_list = ['cocoa', 'cinemaot', 'cinemaotw'] \
117+
method_list = ['cocoa', 'cinemaot', 'cinemaotw', 'mixscape'] \
97118
+ ['ruv_r_{}'.format(r) for r in r_list] \
98119
+ ['ruv3nb_r_{}'.format(r) for r in r_list] \
99120
+ ['causarray_r_{}'.format(r) for r in r_list]
@@ -146,4 +167,67 @@ def legend_title_left(leg):
146167
df_res = pd.concat([df_res, df], axis=0)
147168

148169
df_res.reset_index(drop=True, inplace=True)
149-
df_res.to_csv(path_base+'results/result{}_deconfound.csv'.format(ind))
170+
df_res.to_csv(path_base+'results/result{}_deconfound.csv'.format(ind))
171+
172+
173+
174+
175+
176+
df_test = pd.read_csv(path_base+'results/result{}_test.csv'.format(ind)).rename({'FDR':'FPR', 'power':'TPR'}, axis=1)
177+
df_cf = pd.read_csv(path_base+'results/result{}_deconfound.csv'.format(ind))
178+
179+
r_list = [2,4,6]
180+
method_name = {
181+
'wilc':'Wilcoxon', 'DESeq':'DESeq2', 'cocoa':'CoCoA', 'cinemaot':'CINEMA-OT', 'cinemaotw':'CINEMA-OT-W', 'mixscape':'Mixscape',
182+
}
183+
184+
method_name = reduce(lambda a, b: dict(a, **b),
185+
[{'ruv_r_{}'.format(r):'RUV $r={}$'.format(r) for r in r_list},
186+
{'ruv3nb_r_{}'.format(r):'RUV-III-NB $r={}$'.format(r) for r in r_list},
187+
{'causarray_r_{}'.format(r):'causarray $r={}$'.format(r) for r in r_list}
188+
])
189+
190+
df_test = df_test[df_test['method'].isin(method_name.keys())]
191+
df_cf = df_cf[df_cf['method'].isin(method_name.keys())]
192+
df_test['method'] = df_test['method'].map(method_name)
193+
df_cf['method'] = df_cf['method'].map(method_name)
194+
195+
df_test = df_test[df_test['n'].isin(n_list)]
196+
df_cf = df_cf[df_cf['n'].isin(n_list)]
197+
198+
method_list = method_name.values()#df_test['method'].unique()
199+
# palette = sns.color_palette()[:len(method_list)]
200+
palette = reduce(lambda l1, l2: l1+l2, [sns.color_palette(name)[:len(r_list)*2:2] for name in ['Reds', 'Greens', 'Blues']])
201+
hue_order = {i:c for i,c in zip(method_list, palette) }
202+
203+
sns.set(font_scale=1.3)
204+
fig, axes = plt.subplots(1,4, figsize=(16,5), sharex=False, sharey=False)
205+
for j, metric in enumerate(['FPR', 'TPR']):
206+
sns.boxplot(data=df_test, x='n', y=metric, hue='method', hue_order=hue_order,
207+
ax=axes[j+2], palette=palette, showfliers=False)
208+
209+
for j, metric in enumerate(['ARI', 'ASW']):
210+
sns.boxplot(data=df_cf, x='n', y=metric, hue='method', hue_order=hue_order,
211+
ax=axes[j], palette=palette, showfliers=False)
212+
213+
axes[2].axhline(0.1, color='r', linestyle='--')
214+
lines_labels = [ax.get_legend_handles_labels() for ax in [axes[1]]]
215+
handles, labels = [sum(lol, []) for lol in zip(*lines_labels)]
216+
handles = [mlines.Line2D([], [], linestyle='None')] * 3 + handles[::3] + handles[1::3] + handles[2::3]
217+
labels = ['RUV', 'RUV-III-NB', 'causarray',
218+
'$r=2$', '$r=2$', '$r=2$',
219+
'$r=4$', '$r=4$', '$r=4$',
220+
'$r=6$', '$r=6$', '$r=6$']
221+
222+
for j in range(4):
223+
axes[j].get_legend().remove()
224+
axes[j].tick_params(axis='both', which='major', labelsize=10)
225+
axes[j].set_xlabel('Sample size $n$')
226+
legend = fig.legend(handles=handles, labels=labels,
227+
loc=9, ncol=4, title=None, frameon=False)
228+
legend_title_left(legend)
229+
230+
fig.tight_layout()
231+
fig.subplots_adjust(top=0.78)
232+
233+
plt.savefig(path_base + 'results/simu_nb_r{}.pdf'.format(ind), bbox_inches='tight', pad_inches=0, dpi=300)

0 commit comments

Comments
 (0)