Skip to content

Commit a9b1a1a

Browse files
committed
fix bam syntax
1 parent 14081c6 commit a9b1a1a

2 files changed

Lines changed: 27 additions & 27 deletions

File tree

R/bambu-processReads.R

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,18 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
7777
sampleNames <- as.numeric(as.factor(sampleNames))
7878
for(i in seq_along(readGrgList)){
7979
if(!isFALSE(demultiplexed)){
80-
mcols(readGrgList[[i]])$BC <- paste0(names(reads)[i], '_', mcols(readGrgList[[i]])$BC)
80+
mcols(readGrgList[[i]])$CB <- paste0(names(reads)[i], '_', mcols(readGrgList[[i]])$CB)
8181
} else{
82-
mcols(readGrgList[[i]])$BC <- sampleNames[i]
82+
mcols(readGrgList[[i]])$CB <- sampleNames[i]
8383
}
8484

85-
mcols(readGrgList[[i]])$BC <- as.factor(mcols(readGrgList[[i]])$BC)
85+
mcols(readGrgList[[i]])$CB <- as.factor(mcols(readGrgList[[i]])$CB)
8686

8787
}
8888
readGrgList <- do.call(c, readGrgList)
8989
mcols(readGrgList)$id <- seq_along(readGrgList)
9090
if(!isFALSE(demultiplexed)){
91-
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$BC)
91+
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
9292
} else {
9393
mcols(readGrgList)$sampleID <- i
9494
}
@@ -99,7 +99,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
9999
processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode)
100100
metadata(readClassList)$samples <- names(reads)
101101
metadata(readClassList)$sampleNames <- names(reads)
102-
if(!isFALSE(demultiplexed)) metadata(readClassList)$samples <- levels(mcols(readGrgList)$BC)
102+
if(!isFALSE(demultiplexed)) metadata(readClassList)$samples <- levels(mcols(readGrgList)$CB)
103103
readClassList <- list(readClassList)
104104
}
105105

@@ -131,7 +131,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
131131
if(verbose) message(paste0("Number of alignments/reads: ",length(readGrgList)))
132132
warnings <- c()
133133
if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed))
134-
readGrgList <- readGrgList[!(mcols(readGrgList)$BC %in% barcodesToFilter)]
134+
readGrgList <- readGrgList[!(mcols(readGrgList)$CB %in% barcodesToFilter)]
135135
warnings <- seqlevelCheckReadsAnnotation(readGrgList, annotations)
136136
if(verbose & length(warnings) > 0) warning(paste(warnings,collapse = "\n"))
137137
#check seqlevels for consistency, drop ranges not present in genomeSequence
@@ -175,13 +175,13 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
175175

176176
sampleName <- names(bam.file)[1]
177177
if(!isFALSE(demultiplexed)){
178-
mcols(readGrgList)$BC <- paste0(sampleName, '_', mcols(readGrgList)$BC)
178+
mcols(readGrgList)$CB <- paste0(sampleName, '_', mcols(readGrgList)$CB)
179179
} else{
180-
mcols(readGrgList)$BC <- sampleName
180+
mcols(readGrgList)$CB <- sampleName
181181
}
182-
mcols(readGrgList)$BC <- as.factor(mcols(readGrgList)$BC)
182+
mcols(readGrgList)$CB <- as.factor(mcols(readGrgList)$CB)
183183
if(!isFALSE(demultiplexed)){
184-
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$BC)
184+
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
185185
} else {
186186
mcols(readGrgList)$sampleID <- index
187187
}
@@ -219,7 +219,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
219219

220220
metadata(se)$samples <- names(bam.file)[1]
221221
metadata(se)$sampleNames <- names(bam.file)[1]
222-
if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$BC)
222+
if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$CB)
223223
return(se)
224224
}
225225

@@ -234,7 +234,7 @@ bambu.readsByFile <- function(bam.file, genomeSequence, annotations,
234234
cleanReads = TRUE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) {
235235
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI)
236236

237-
if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed)) readGrgList <- readGrgList[!mcols(readGrgList)$BC %in% barcodesToFilter]
237+
if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed)) readGrgList <- readGrgList[!mcols(readGrgList)$CB %in% barcodesToFilter]
238238

239239
if(verbose) message("Number of alignments/reads: ",length(readGrgList))
240240

R/prepareDataFromBam.R

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,30 +39,30 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
3939
}
4040
}
4141
while (isIncomplete(bf)) {
42-
alignmentInfo <- readGAlignments(bf, param = ScanBamParam(tag = c("BC", "UG"),
42+
alignmentInfo <- readGAlignments(bf, param = ScanBamParam(tag = c("CB", "UB"),
4343
flag = scanBamFlag(isSecondaryAlignment = FALSE)),
4444
use.names = use.names)
4545
readGrgList[[counter]] <-grglist(alignmentInfo)
4646
if (!isFALSE(demultiplexed)){ # if demultiplexed is TRUE or a string path
4747
if(isTRUE(demultiplexed)){ # if demultiplexed is TRUE
4848

49-
mcols(readGrgList[[counter]])$BC <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether BC is contained in the name, with specific format BC_UMI#READNAME,
50-
!is.na(mcols(alignmentInfo)$BC) ~ mcols(alignmentInfo)$BC,
49+
mcols(readGrgList[[counter]])$CB <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether CB is contained in the name, with specific format CB_UMI#READNAME,
50+
!is.na(mcols(alignmentInfo)$CB) ~ mcols(alignmentInfo)$CB,
5151
TRUE ~ NA)
5252

53-
mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format BC_UMI#READNAME,
54-
!is.na(mcols(alignmentInfo)$UG) ~ mcols(alignmentInfo)$UG,
53+
mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format CB_UMI#READNAME,
54+
!is.na(mcols(alignmentInfo)$UB) ~ mcols(alignmentInfo)$UB,
5555
TRUE ~ NA)
5656
} else{ # if demultiplexed is a string path
57-
mcols(readGrgList[[counter]])$BC <- NA
57+
mcols(readGrgList[[counter]])$CB <- NA
5858
mcols(readGrgList[[counter]])$UMI <- NA
59-
mcols(readGrgList[[counter]])$BC <- readMap[,2][match(names(readGrgList[[counter]]),readMap[,1])]
59+
mcols(readGrgList[[counter]])$CB <- readMap[,2][match(names(readGrgList[[counter]]),readMap[,1])]
6060
if(ncol(readMap)>2){
6161
mcols(readGrgList[[counter]])$UMI <- readMap[,3][match(names(readGrgList[[counter]]),readMap[,1])]
6262
}
6363
}
64-
cells <- unique(c(cells, mcols(readGrgList[[counter]])$BC))
65-
mcols(readGrgList[[counter]])$BC <- factor(mcols(readGrgList[[counter]])$BC, levels = cells)
64+
cells <- unique(c(cells, mcols(readGrgList[[counter]])$CB))
65+
mcols(readGrgList[[counter]])$CB <- factor(mcols(readGrgList[[counter]])$CB, levels = cells)
6666
umi <- unique(c(umi, mcols(readGrgList[[counter]])$UMI))
6767
mcols(readGrgList[[counter]])$UMI <- factor(mcols(readGrgList[[counter]])$UMI, levels = umi)
6868
}
@@ -96,10 +96,10 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
9696
}
9797
# remove microexons of width 1bp from list
9898
readGrgList <- readGrgList <- readGrgList[sum(width(readGrgList)) > 1]
99-
numNoBCs <- sum(is.na(mcols(readGrgList)$BC))
100-
if(numNoBCs > 0){
101-
message("Removing ", numNoBCs, " reads that were not assigned barcodes. If this is unexpected check the barcode map input")
102-
readGrgList <- readGrgList[!is.na(mcols(readGrgList)$BC)]
99+
numNoCBs <- sum(is.na(mcols(readGrgList)$CB))
100+
if(numNoCBs > 0){
101+
message("Removing ", numNoCBs, " reads that were not assigned barcodes. If this is unexpected check the barcode map input")
102+
readGrgList <- readGrgList[!is.na(mcols(readGrgList)$CB)]
103103
}
104104
if(cleanReads){
105105
#extract duplicated reads from flexiplex to clean
@@ -127,8 +127,8 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE,
127127
start.ptm <- proc.time()
128128
numUMIs <- length(na.omit(unique(mcols(readGrgList)$UMI))) # remove NA UMIs
129129
if(numUMIs > 100){
130-
df <- data.frame(umi = mcols(readGrgList)$BC,
131-
barcode = mcols(readGrgList)$UMI,
130+
df <- data.frame(umi = mcols(readGrgList)$UMI,
131+
barcode = mcols(readGrgList)$CB,
132132
lengths = sum(width(readGrgList)))
133133
df <- df %>% mutate(id = row_number()) %>% group_by(barcode, umi) %>% summarise(primary.id = id[which.max(lengths)])
134134
readGrgList <- readGrgList[df$primary.id]

0 commit comments

Comments
 (0)