Skip to content

Commit ccb689e

Browse files
author
Sui Yue
authored
Merge pull request #458 from GoekeLab/Multiplex_Major_Patch
Merge Multiplex to singleExon_2
2 parents 051480c + b3fed7c commit ccb689e

10 files changed

Lines changed: 699 additions & 558 deletions

R/bambu-assignDist.R

Lines changed: 65 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -7,91 +7,102 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
77
returnDistTable = FALSE, trackReads = TRUE) {
88
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
99
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
10-
readClassList = splitReadClassFiles(readClassList)
10+
readClassList <- splitReadClassFiles(readClassList)
1111
readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose)
1212
readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById)
1313
readClassDt <- simplifyNames(readClassDt)
14-
readClassDt = readClassDt %>% group_by(eqClassId, gene_sid) %>%
15-
mutate(multi_align = length(unique(txid))>1) %>% ungroup() %>% mutate(aval = 1) %>%
14+
readClassDt <- readClassDt %>% group_by(eqClassId, gene_sid) %>%
15+
mutate(multi_align = length(unique(txid))>1) %>%
16+
ungroup() %>%
17+
mutate(aval = 1) %>%
1618
data.table()
1719
#return non-em counts
18-
ColData = generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
20+
ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
1921
quantData <- SummarizedExperiment(assays = SimpleList(
2022
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
2123
rowRanges = annotations,
2224
colData = ColData)
23-
colnames(quantData) = ColData$id
25+
colnames(quantData) <- ColData$id
2426
if(sum(metadata(readClassList)$incompatibleCountMatrix)==0){
25-
metadata(quantData)$incompatibleCounts = NULL
26-
} else{
27-
metadata(quantData)$incompatibleCounts = generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations)
28-
}
29-
metadata(quantData)$nonuniqueCounts = generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)
30-
metadata(quantData)$readClassDt = readClassDt
31-
metadata(quantData)$countMatrix = metadata(readClassList)$countMatrix
32-
metadata(quantData)$incompatibleCountMatrix = metadata(readClassList)$incompatibleCountMatrix
33-
metadata(quantData)$sampleNames = metadata(readClassList)$sampleNames
34-
if(returnDistTable){
35-
metadata(quantData)$distTable = metadata(metadata(readClassList)$readClassDist)$distTableOld
27+
metadata(quantData)$incompatibleCounts <- NULL
28+
}else{
29+
metadata(quantData)$incompatibleCounts <- generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations)
3630
}
37-
if (trackReads){
38-
metadata(quantData)$readToTranscriptMap =
39-
generateReadToTranscriptMap(readClassList, metadata(readClassList)$readClassDist,
40-
annotations)
41-
}
31+
metadata(quantData)$nonuniqueCounts <- generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)
32+
metadata(quantData)$readClassDt <- readClassDt
33+
metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix
34+
metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix
35+
metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames
36+
if(returnDistTable)
37+
metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld
38+
39+
if(trackReads)
40+
metadata(quantData)$readToTranscriptMap <-
41+
generateReadToTranscriptMap(readClassList,
42+
metadata(readClassList)$readClassDist,
43+
annotations)
44+
4245
return(quantData)
4346

4447
}
4548

46-
49+
#' Generate unique counts
50+
#' @noRd
4751
generateUniqueCounts <- function(readClassDt, countMatrix, annotations){
48-
x = readClassDt %>% filter(!multi_align & !is.na(eqClass.match))
49-
uniqueCounts = countMatrix[x$eqClass.match,]
50-
uniqueCounts.tx = sparse.model.matrix(~ factor(x$txid) - 1)
51-
uniqueCounts = t(uniqueCounts.tx) %*% uniqueCounts
52-
rownames(uniqueCounts) = names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)]
53-
counts = sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0)
54-
rownames(counts) = names(annotations)
55-
counts[rownames(uniqueCounts),] = uniqueCounts
52+
x <- readClassDt %>% filter(!multi_align & !is.na(eqClass.match))
53+
uniqueCounts <- countMatrix[x$eqClass.match,]
54+
uniqueCounts.tx <- sparse.model.matrix(~ factor(x$txid) - 1)
55+
uniqueCounts <- t(uniqueCounts.tx) %*% uniqueCounts
56+
rownames(uniqueCounts) <- names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)]
57+
counts <- sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0)
58+
rownames(counts) <- names(annotations)
59+
counts[rownames(uniqueCounts),] <- uniqueCounts
5660
return(counts)
57-
58-
counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix)
59-
counts.total[counts.total==0] = 1
60-
counts.CPM = counts/counts.total * 10^6
61+
62+
# these three lines appear after return, so it's not used, is this used for debug only?
63+
# counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix)
64+
# counts.total[counts.total==0] = 1
65+
# counts.CPM = counts/counts.total * 10^6
6166

6267
}
6368

69+
70+
#' Generate incompatible counts
71+
#' @noRd
6472
generateIncompatibleCounts <- function(incompatibleCountMatrix, annotations){
65-
genes = levels(factor(unique(mcols(annotations)$GENEID)))
66-
rownames(incompatibleCountMatrix) = genes[as.numeric(rownames(incompatibleCountMatrix))]
67-
geneMat = sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0)
68-
rownames(geneMat) = genes
69-
geneMat[rownames(incompatibleCountMatrix),] = incompatibleCountMatrix
73+
genes <- levels(factor(unique(mcols(annotations)$GENEID)))
74+
rownames(incompatibleCountMatrix) <- genes[as.numeric(rownames(incompatibleCountMatrix))]
75+
geneMat <- sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0)
76+
rownames(geneMat) <- genes
77+
geneMat[rownames(incompatibleCountMatrix),] <- incompatibleCountMatrix
7078
return(geneMat)
7179
}
7280

81+
82+
#' Generate non-unique counts
83+
#' @noRd
7384
generateNonUniqueCounts <- function(readClassDt, countMatrix, annotations){
7485
#fuse multi align RCs by gene
75-
x = readClassDt %>% filter(multi_align & !is.na(eqClass.match))
76-
x = x %>% distinct(eqClassId, .keep_all = TRUE)
77-
nonuniqueCounts = countMatrix[x$eqClass.match,, drop = FALSE]
86+
x <- readClassDt %>% filter(multi_align & !is.na(eqClass.match))
87+
x <- x %>% distinct(eqClassId, .keep_all = TRUE)
88+
nonuniqueCounts <- countMatrix[x$eqClass.match,, drop = FALSE]
7889
if(nrow(x)>1 & length(unique(x$gene_sid))>1){
79-
nonuniqueCounts.gene = sparse.model.matrix(~ factor(x$gene_sid) - 1)
80-
nonuniqueCounts = t(nonuniqueCounts.gene) %*% nonuniqueCounts
90+
nonuniqueCounts.gene <- sparse.model.matrix(~ factor(x$gene_sid) - 1)
91+
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
8192
} else{
8293
warning("The factor variable 'gene_sid' has only one level. Adjusting output.")
83-
nonuniqueCounts.gene = Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
84-
nonuniqueCounts = t(nonuniqueCounts.gene) %*% nonuniqueCounts
94+
nonuniqueCounts.gene <- Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
95+
nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
8596
}
8697
#covert ids into gene ids
87-
geneids = as.numeric(levels(factor(x$gene_sid)))
88-
geneids = x$txid[match(geneids, x$gene_sid)]
89-
geneids = mcols(annotations)$GENEID[as.numeric(geneids)]
90-
rownames(nonuniqueCounts) = geneids
98+
geneids <- as.numeric(levels(factor(x$gene_sid)))
99+
geneids <- x$txid[match(geneids, x$gene_sid)]
100+
geneids <- mcols(annotations)$GENEID[as.numeric(geneids)]
101+
rownames(nonuniqueCounts) <- geneids
91102
#create matrix for all annotated genes
92-
genes = levels(factor(unique(mcols(annotations)$GENEID)))
93-
geneMat = sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
94-
rownames(geneMat) = genes
95-
geneMat[rownames(nonuniqueCounts),] = nonuniqueCounts
103+
genes <- levels(factor(unique(mcols(annotations)$GENEID)))
104+
geneMat <- sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
105+
rownames(geneMat) <- genes
106+
geneMat[rownames(nonuniqueCounts),] <- nonuniqueCounts
96107
return(geneMat)
97108
}

R/bambu-extendAnnotations-utilityCombine.R

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ isore.combineTranscriptCandidates <- function(readClassList,
1919
min.readCount, min.readFractionByGene,
2020
min.txScore.multiExon, min.txScore.singleExon, verbose) %>% data.table()
2121
combinedSplicedTranscripts[,confidenceType := "highConfidenceJunctionReads"]
22-
if (min.txScore.singleExon == 1) {return(combinedSplicedTranscripts)}
22+
# when single exon min score is greater than 1, skip unspliced transcripts combination
23+
# this is a very customized config, useful when data is very big
24+
if (min.txScore.singleExon > 1)
25+
return(combinedSplicedTranscripts)
2326
combinedUnsplicedTranscripts <-
2427
combineUnsplicedTranscriptModels(readClassList, bpParameters,
2528
stranded, min.readCount, min.readFractionByGene,
@@ -36,11 +39,11 @@ isore.combineTranscriptCandidates <- function(readClassList,
3639
combineSplicedTranscriptModels <- function(readClassList, bpParameters,
3740
min.readCount, min.readFractionByGene, min.txScore.multiExon,
3841
min.txScore.singleExon, verbose){
39-
bpParameters$progressbar = FALSE
42+
bpParameters$progressbar <- FALSE
4043
options(scipen = 999) #maintain numeric basepair locations not sci.notfi.
4144
start.ptm <- proc.time()
4245
n_sample <- length(readClassList)
43-
nGroups = max(ceiling(n_sample/10),min(bpworkers(bpParameters),
46+
nGroups <- max(ceiling(n_sample/10),min(bpworkers(bpParameters),
4447
round(n_sample/2)))
4548
indexList <- sample(rep(seq_len(nGroups), length.out=n_sample))
4649
indexList <- splitAsList(seq_len(n_sample), indexList)
@@ -135,7 +138,7 @@ combineFeatureTibble <- function(combinedFeatureTibble,
135138
maxTxScore.noFit, NSampleReadCount, NSampleReadProp,NSampleTxScore,
136139
starts_with('start'), starts_with('end'), starts_with('readCount'))
137140
} else {
138-
combinedTable = full_join(combinedFeatureTibble,
141+
combinedTable <- full_join(combinedFeatureTibble,
139142
featureTibbleSummarised, by = c('intronStarts', 'intronEnds', 'chr',
140143
'strand'), suffix=c('.combined','.new')) %>%
141144
mutate(NSampleReadCount=pmax0NA(NSampleReadCount.combined) +
@@ -215,7 +218,7 @@ combineUnsplicedTranscriptModels <-
215218
min.readFractionByGene, min.txScore.multiExon,
216219
min.txScore.singleExon, verbose){
217220
start.ptm <- proc.time()
218-
bpParameters$progressbar = FALSE
221+
bpParameters$progressbar <- FALSE
219222
newUnsplicedSeList <-
220223
bplapply(seq_along(readClassList), function(sample_id)
221224
extractNewUnsplicedRanges(readClassSe =
@@ -292,7 +295,7 @@ reduceUnsplicedRanges <- function(rangesList, stranded){
292295
makeUnsplicedTibble <- function(combinedNewUnsplicedSe,newUnsplicedSeList,
293296
colDataNames,min.readCount, min.readFractionByGene,
294297
min.txScore.multiExon, min.txScore.singleExon, bpParameters){
295-
bpParameters$progressbar = FALSE
298+
bpParameters$progressbar <- FALSE
296299
newUnsplicedTibble <- as_tibble(combinedNewUnsplicedSe) %>%
297300
rename(chr = seqnames) %>% select(chr, start, end, strand, row_id) %>%
298301
separate_rows(row_id, sep = "\\+")

0 commit comments

Comments
 (0)