Skip to content

Commit a4491c0

Browse files
committed
Update pseudo-bulk simulation
1 parent 18a4599 commit a4491c0

4 files changed

Lines changed: 59 additions & 39 deletions

File tree

paper/simu_poi/Plot.ipynb

Lines changed: 22 additions & 6 deletions
Large diffs are not rendered by default.

paper/simu_poi/simu_poi_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def generate_data(
8686

8787
if d > 0:
8888
# Generate covariates and treatment assignment
89-
_W = np.random.normal(size=(n_samples,d)) * 0.5#noise
89+
_W = np.random.normal(size=(n_samples,d)) * 0.5
9090
_W[:,-r:] *= noise
9191
_W = (_W + ct_mean) * ct_scale
9292
W = np.c_[W, _W]
@@ -158,7 +158,7 @@ def generate_data(
158158
psi = 0.1 # zero-inflation probability
159159

160160
# Save data to HDF5 file
161-
for n in range(100,320,100):
161+
for n in [100, 500, 1000, 5000]:
162162
path_to_data = '/home/jinandmaya/simu_poi/data/simu_{}{}/'.format(n, ind)
163163
os.makedirs(path_to_data, exist_ok=True)
164164
for seed in tqdm(range(50)):

paper/simu_poi/simu_poi_fit.R

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
library(rhdf5)
2+
23
# Load Python and R functions for basic Wilcoxon Tests, DESeq2, and CocoA-Diff
34
# and causarray, cinemaot
45
require(reticulate)
@@ -16,8 +17,9 @@ p <- 2000
1617
c = 0.1
1718
alpha = 0.1
1819

19-
ind <- '_d_0_r_4_noise_1.0'
20+
ind <- '_d_2_r_1_noise_0.5'
2021
num_r <- 1
22+
noise <- 1.
2123

2224
args = commandArgs(trailingOnly=TRUE)
2325
if(length(args)==0){
@@ -35,13 +37,12 @@ if((ind != '') && grepl( 'r', ind, fixed = TRUE)){
3537

3638
if((ind != '') && grepl( 'noise', ind, fixed = TRUE)){
3739
library(stringr)
38-
noise <- as.double(str_extract(ind, "(?<=noise_)[0-9]*"))
40+
noise <- as.double(str_extract(ind, "(?<=noise_)[0-9]+(\\.[0-9]+)?"))
3941
}else{
40-
noise <- 1.
42+
noise <- 1
4143
}
4244

43-
44-
for(n in c(100,500,1000,5000)){
45+
for(n in c(100, 500, 1000, 5000)){
4546

4647
path_result <- sprintf(paste0(path_base,'results/simu_%d%s/'), n, ind)
4748
dir.create(path_result, recursive=TRUE, showWarnings = FALSE)
@@ -75,7 +76,7 @@ for(n in c(100,500,1000,5000)){
7576
possibleError <- tryCatch(
7677
{
7778
# Wilcoxon Tests ----
78-
res.wilc <- run_wilcoxon(Y, metadata=scaleW, raw=T)
79+
res.wilc <- run_wilcoxon(Y, metadata=scaleW)
7980

8081
# # DESeq ----
8182
res.DESeq <- run_DESeq(Y, metadata=scaleW, return_ds=TRUE)
@@ -87,7 +88,6 @@ for(n in c(100,500,1000,5000)){
8788
cocoa <- run_cocoa(sc = Y_sc, indvs=metadata[,2], metadata=scaleW, cocoAWriteName=sprintf('%stmp_cocoa/tmp_%d', path_result, seed))
8889
res.cocoa <- cocoa[[1]]
8990
cf.cocoa <- t(cocoa[[2]]$cf.ln.mu)
90-
9191

9292
# CINEMA-OT ---- run_cinemaot returns list(df, CF, TE)
9393
res.cinemaot <- run_cinemaot(Y, A, raw=TRUE)
@@ -100,8 +100,6 @@ for(n in c(100,500,1000,5000)){
100100
W.cinemaotw <- res.cinemaotw[[2]]$W
101101
res.cinemaotw <- res.cinemaotw[[1]]
102102

103-
104-
105103
# Save confounder estimation results
106104
write.csv(cf.cocoa, sprintf('%scocoa_cf_%d.csv', path_result, seed))
107105
write.csv(cf.cinemaot, sprintf('%scinemaot_cf_%d.csv', path_result, seed))
@@ -116,7 +114,15 @@ for(n in c(100,500,1000,5000)){
116114
write.csv(res.cinemaot, sprintf('%scinemaot_%d.csv', path_result, seed))
117115
write.csv(res.cinemaotw, sprintf('%scinemaotw_%d.csv', path_result, seed))
118116

117+
res.mixscape <- run_mixscape(Y, A, raw=TRUE)
118+
cf.mixscape <- res.mixscape[[2]]$Y_hat_0
119+
W.mixscape <- res.mixscape[[2]]$W
120+
res.mixscape <- res.mixscape[[1]]
119121

122+
write.csv(cf.mixscape, sprintf('%smixscape_cf_%d.csv', path_result, seed))
123+
write.csv(W.mixscape, sprintf('%smixscape_W_%d.csv', path_result, seed))
124+
write.csv(res.mixscape, sprintf('%smixscape_%d.csv', path_result, seed))
125+
120126
for(r_hat in c(2,4,6)){
121127
# RUV
122128
ruv <- run_ruv(Y, metadata=scaleW, r_hat)
@@ -136,39 +142,40 @@ for(n in c(100,500,1000,5000)){
136142
write.csv(cf.ruv3nb, sprintf('%sruv3nb_r_%d_cf_%d.csv', path_result, r_hat, seed))
137143
write.csv(W.ruv3nb, sprintf('%sruv3nb_r_%d_W_%d.csv', path_result, r_hat, seed))
138144
write.csv(res.ruv3nb, sprintf('%sruv3nb_r_%d_%d.csv', path_result, r_hat, seed))
139-
}
145+
}
140146

141-
# causarray
147+
# # causarray
142148
# res.causarray.r <- estimate_r_causarray(Y, scaleW[,-ncol(scaleW)], A, 5)
143149
# r_hat <- res.causarray.r[which.min(res.causarray.r$JIC), 'r']
144150
# cat('r_hat:', r_hat, '\n')
145151
# r_hat <- num_r
146152
# res.causarray <- run_causarray(Y, scaleW[,-ncol(scaleW)], A,
147-
# alpha=alpha, c=c, r=r_hat, #B=2000,
148-
# # Y_hat_0=res[[1]], Y_hat_1=res[[2]],
149-
# # ps_model='logistic', penalty=NULL,
150-
# # max_depth=as.integer(5), func='LFC'
153+
# alpha=alpha, c=c, r=r_hat
151154
# )[[1]]
152155

153156
res.causarray <- run_causarray(Y, scaleW[,-ncol(scaleW)], A,
154-
fdx=T, r=r_hat, glm_alpha=.5, shrinkage=T, usevar = 'unequal')
155-
cf.causarray <- log1p(res.causarray[[2]]$Y_hat_0)
157+
fdx=T, r=r_hat, glm_alpha=0.5, shrinkage=T, usevar = 'unequal'
158+
)
159+
160+
cf.causarray <- log1p(res.causarray[[2]]$Y_hat[,,1,1])
156161
W.causarray <- res.causarray[[2]]$W
157162
res.causarray <- res.causarray[[1]]
158-
163+
cat(sum(res.causarray$padj<0.1))
164+
159165
# Save confounder estimation results
160166
write.csv(cf.ruv, sprintf('%sruv_r_%d_cf_%d.csv', path_result, r_hat, seed))
161167
write.csv(W.ruv, sprintf('%sruv_r_%d_W_%d.csv', path_result, r_hat, seed))
162-
168+
write.csv(res.ruv, sprintf('%sruv_r_%d_%d.csv', path_result, r_hat, seed))
169+
163170
write.csv(cf.causarray, sprintf('%scausarray_r_%d_cf_%d.csv', path_result, r_hat, seed))
164171
write.csv(W.causarray, sprintf('%scausarray_r_%d_W_%d.csv', path_result, r_hat, seed))
165-
166-
write.csv(res.ruv, sprintf('%sruv_r_%d_%d.csv', path_result, r_hat, seed))
167172
write.csv(res.causarray, sprintf('%scausarray_r_%d_%d.csv', path_result, r_hat, seed))
168173
}
169174

170175
},
171176
error=function(e) e
172177
)
173178
}
174-
}
179+
}
180+
181+
print('Done')

paper/simu_poi/simu_poi_plot.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def legend_title_left(leg):
3131
else:
3232
ind = ''
3333
r_list = [2,4,6]
34-
method_list = ['wilc', 'DESeq', 'cocoa', 'cinemaot', 'cinemaotw'] \
34+
method_list = ['wilc', 'DESeq', 'cocoa', 'cinemaot', 'cinemaotw', 'mixscape'] \
3535
+ ['ruv_r_{}'.format(r) for r in r_list] \
3636
+ ['ruv3nb_r_{}'.format(r) for r in r_list] \
3737
+ ['causarray_r_{}'.format(r) for r in r_list]
@@ -85,7 +85,7 @@ def legend_title_left(leg):
8585

8686

8787

88-
method_list = ['cocoa', 'cinemaot', 'cinemaotw'] \
88+
method_list = ['cocoa', 'cinemaot', 'cinemaotw', 'mixscape'] \
8989
+ ['ruv_r_{}'.format(r) for r in r_list] \
9090
+ ['ruv3nb_r_{}'.format(r) for r in r_list] \
9191
+ ['causarray_r_{}'.format(r) for r in r_list]
@@ -108,7 +108,6 @@ def legend_title_left(leg):
108108
Y = np.array(f['Y'], dtype='float') / np.exp(W[:,:-r] @ B[:-r,:])
109109
Z = W[:,-1:]
110110
metadata = np.array(f['metadata'], dtype='float')
111-
112111
celltype = metadata[np.unique(metadata[:,1], return_index=True)[1],-1].astype(int)
113112
print(seed)
114113

@@ -126,7 +125,7 @@ def legend_title_left(leg):
126125
if not os.path.exists(filename):
127126
print(method, 'cf')
128127
break
129-
CF = pd.read_csv(filename, index_col=0).values
128+
CF = pd.read_csv(filename, index_col=0).values.astype(float)
130129
res = comp_score(Y, CF, celltype, Z, W_hat)
131130
res = np.r_[[method, seed], res]
132131

@@ -152,10 +151,9 @@ def legend_title_left(leg):
152151
df_cf = pd.read_csv(path_base+'results/result{}_deconfound.csv'.format(ind))
153152
r_list = [2]
154153

155-
method_name = {'wilc':'Wilcoxon', 'DESeq':'DESeq2', 'cocoa':'CoCoA', 'cinemaot':'CINEMA-OT', 'cinemaotw':'CINEMA-OT-W',
154+
method_name = {'wilc':'Wilcoxon', 'DESeq':'DESeq2', 'cocoa':'CoCoA', 'cinemaot':'CINEMA-OT', 'cinemaotw':'CINEMA-OT-W', 'mixscape':'Mixscape',
156155
}
157-
158-
156+
159157
method_name.update(
160158
reduce(lambda a, b: dict(a, **b),
161159
[{'ruv_r_{}'.format(r):'RUV' for r in r_list},
@@ -164,7 +162,6 @@ def legend_title_left(leg):
164162
])
165163
)
166164

167-
168165
df_test = df_test[df_test['method'].isin(method_name.keys())]
169166
df_cf = df_cf[df_cf['method'].isin(method_name.keys())]
170167
df_test['method'] = df_test['method'].map(method_name)
@@ -185,7 +182,7 @@ def legend_title_left(leg):
185182

186183
for j, metric in enumerate(['ARI', 'ASW']):
187184
sns.barplot(data=df_cf, x='n', y=metric, hue='method', hue_order=hue_order,
188-
ax=axes[j], palette=palette)#, showfliers=False)
185+
ax=axes[j], palette=palette)
189186

190187
axes[2].axhline(0.1, color='r', linestyle='--')
191188
lines_labels = [ax.get_legend_handles_labels() for ax in [axes[1]]]

0 commit comments

Comments
 (0)