Skip to content

Commit 8833e6d

Browse files
author
Sui Yue
authored
Merge pull request #469 from GoekeLab/Multiplex_Major_Patch
Multiplex major patch to singleExon_2
2 parents 7de4994 + 69d2bf8 commit 8833e6d

3 files changed

Lines changed: 20 additions & 28 deletions

File tree

R/bambu-processReads.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
8484

8585
mcols(readGrgList[[i]])$BC <- as.factor(mcols(readGrgList[[i]])$BC)
8686

87-
if(!isFALSE(demultiplexed)){
88-
mcols(readGrgList[[i]])$sampleID <- as.numeric(mcols(readGrgList[[i]])$BC)
89-
} else {
90-
mcols(readGrgList[[i]])$sampleID <- i
91-
}
9287
}
9388
readGrgList <- do.call(c, readGrgList)
9489
mcols(readGrgList)$id <- seq_along(readGrgList)
90+
if(!isFALSE(demultiplexed)){
91+
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$BC)
92+
} else {
93+
mcols(readGrgList)$sampleID <- i
94+
}
9595
readClassList <- constructReadClasses(readGrgList, genomeSequence = genomeSequence,annotations = annotations,
9696
stranded = stranded, min.readCount = min.readCount,
9797
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,

R/bambu.R

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,8 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
264264
ColNames <- c()
265265
for(i in seq_along(quantData)){
266266
quantData_i <- quantData[[i]]
267-
#load in the barcode clustering from file if provided
268-
iter <- seq_len(ncol(metadata(quantData_i)$countMatrix)) # iter is integer
269267
if(!is.null(clusters)){
270-
if(class(clusters)!="CompressedCharacterList"){ # !is.list(clusters) is FALSE for CompressedCharacterList
268+
if(class(clusters[[i]])!="CompressedCharacterList"){ # !is.list(clusters) is FALSE for CompressedCharacterList
271269
clusterMaps <- NULL
272270
for(j in seq_along(metadata(quantData_i)$sampleNames)){ #load in a file per sample name provided
273271
clusterMap <- fread(clusters[[j]], header = FALSE,
@@ -284,14 +282,9 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
284282
rm(clusterMap)
285283
iter <- clustering
286284

287-
} else{ #if clusters is a list
288-
if(length(quantData)>1){
289-
iter <- clusters[[i]] #lowMemory mode
290-
}else{
291-
iter <- clusters#do.call(c,clusters)
292-
}
285+
} else{
286+
iter <- clusters[[i]]
293287
}
294-
}
295288
countsSeCompressed <- bplapply(iter, FUN = function(j){ # previous i changed to j to avoid duplicated assignment
296289
#i = iter[i %in% colnames(metadata(quantData_i)$countMatrix)] #bug, after assignment, i become emptyprint(i)
297290
countMatrix <- unname(metadata(quantData_i)$countMatrix[,j]) # same here
@@ -331,4 +324,5 @@ bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL,
331324
colnames(countsSe) <- ColData[,1]
332325
return(countsSe)
333326
}
334-
}
327+
}
328+
}

R/prepareDataFromBam.R

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,17 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
4545
readGrgList[[counter]] <-grglist(alignmentInfo)
4646
if (!isFALSE(demultiplexed)){ # if demultiplexed is TRUE or a string path
4747
if(isTRUE(demultiplexed)){ # if demultiplexed is TRUE
48-
mcols(readGrgList[[counter]])$BC <- ifelse(!is.na(mcols(alignmentInfo)$BC),
49-
mcols(alignmentInfo)$BC,
50-
ifelse(grepl("[GACT]_",names(readGrgList[[counter]])), # a checkpoint to see whether BC is contained in the name, with specific format BC_UMI#READNAME
51-
gsub("(^[GACT]+(?=_)).*", '\\1',
52-
names(readGrgList[[counter]]), perl = TRUE),
53-
NA))
54-
mcols(readGrgList[[counter]])$UMI <- ifelse(!is.na(mcols(alignmentInfo)$UG), mcols(alignmentInfo)$UG,
55-
ifelse(grepl("[GACT]#",names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format BC_UMI#READNAME
56-
gsub(".*((?<=_)[GACT]*(?=#)).*", '\\1', names(readGrgList[[counter]]), perl = TRUE),
57-
NA))
48+
49+
mcols(readGrgList[[counter]])$BC <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether BC is contained in the name, with specific format BC_UMI#READNAME,
50+
!is.na(mcols(alignmentInfo)$BC) ~ mcols(alignmentInfo)$BC,
51+
TRUE ~ NA)
52+
53+
mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format BC_UMI#READNAME,
54+
!is.na(mcols(alignmentInfo)$UG) ~ mcols(alignmentInfo)$UG,
55+
TRUE ~ NA)
5856
} else{ # if demultiplexed is a string path
5957
mcols(readGrgList[[counter]])$BC <- NA
60-
mcols(readGrgList[[counter]])$UMI <- "NA"
58+
mcols(readGrgList[[counter]])$UMI <- NA
6159
mcols(readGrgList[[counter]])$BC <- readMap[,2][match(names(readGrgList[[counter]]),readMap[,1])]
6260
if(ncol(readMap)>2){
6361
mcols(readGrgList[[counter]])$UMI <- readMap[,3][match(names(readGrgList[[counter]]),readMap[,1])]
@@ -154,4 +152,4 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
154152
clipFunction <- function(cigarData, grep_pattern, replace_pattern){
155153
return(suppressWarnings(pmax(0,as.numeric(gsub(grep_pattern,replace_pattern,
156154
cigarData)), na.rm=T)))
157-
}
155+
}

0 commit comments

Comments
 (0)