Skip to content

Commit 1ff04d3

Browse files
committed
Merge branch 'devel' into Documentation_update
2 parents 02d2d50 + db3172c commit 1ff04d3

16 files changed

Lines changed: 79 additions & 61 deletions

.github/workflows/check-bioc.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ jobs:
5454
fail-fast: false
5555
matrix:
5656
config:
57-
- { os: ubuntu-latest, r: '4.3', bioc: '3.17', cont: "bioconductor/bioconductor_docker:RELEASE_3_17", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
58-
- { os: macOS-latest, r: '4.3', bioc: '3.17'}
59-
- { os: windows-latest, r: '4.3', bioc: '3.17'}
57+
- { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
58+
- { os: macOS-latest, r: '4.3', bioc: '3.18'}
59+
##- { os: windows-latest, r: '4.3', bioc: '3.18'}
6060
## Check https://github.com/r-lib/actions/tree/master/examples
6161
## for examples using the http-user-agent
6262
env:

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: bambu
22
Type: Package
33
Title: Context-Aware Transcript Quantification from Long Read RNA-Seq data
4-
Version: 3.3.2
4+
Version: 3.5.1
55
Authors@R: c(person("Ying", "Chen", role = c("cre","aut"),
66
email = "chen_ying@gis.a-star.edu.sg"),
77
person("Andre", "Sim", role = "aut",
@@ -57,6 +57,7 @@ biocViews:
5757
GenomeAnnotation,
5858
GenomeAssembly,
5959
ImmunoOncology,
60+
LongRead,
6061
MultipleComparison,
6162
Normalization,
6263
RNASeq,

R/bambu-processReads_utilityConstructReadClasses.R

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,12 @@ constructUnsplicedReadClasses <- function(reads.singleExon, annotations,
246246
counts = as.data.frame(reads.singleExon) %>%
247247
mutate(id = mcols(reads.singleExon)$id) %>%
248248
group_by(seqnames,start,end,strand) %>%
249-
summarise(n=n(), id = list(id)) %>%
249+
mutate(n=n(), id = list(id)) %>% # change summarise to mutate as summarise will reorder the table
250+
ungroup() %>%
250251
as.data.frame()
251-
reads.singleExon = unique(reads.singleExon)
252252
mcols(reads.singleExon)$counts <- counts$n
253253
mcols(reads.singleExon)$id <- counts$id
254+
reads.singleExon = unique(reads.singleExon)
254255

255256
rcUnsplicedAnnotation <- getUnsplicedReadClassByReference(
256257
granges = reads.singleExon, grangesReference = referenceExons,
@@ -385,10 +386,12 @@ assignGeneIds <- function(grl, annotations, min.exonOverlap = 10, fusionMode =
385386
fusionMode = fusionMode)
386387
#iteratively assign gene ids for stranded granges
387388
newGeneSet <- is.na(mcols(grl)$GENEID) & strandedRanges
388-
referenceGeneSet <- !is.na(mcols(grl)$GENEID) & strandedRanges
389-
mcols(grl)$GENEID[newGeneSet] <- assignGeneIdsByReference(grl[newGeneSet], grl[referenceGeneSet],
390-
min.exonOverlap = min.exonOverlap,
391-
fusionMode = FALSE) # fusion assignment is only done based on original annotations
389+
if(sum(newGeneSet != 0)){
390+
referenceGeneSet <- !is.na(mcols(grl)$GENEID) & strandedRanges
391+
mcols(grl)$GENEID[newGeneSet] <- assignGeneIdsByReference(grl[newGeneSet], grl[referenceGeneSet],
392+
min.exonOverlap = min.exonOverlap,
393+
fusionMode = FALSE) # fusion assignment is only done based on original annotations
394+
}
392395
chainCount <- 1 # first iteration is outside of while loop
393396
while(any(!is.na(mcols(grl)$GENEID[newGeneSet])) & chainCount < maxChainIteration) {
394397
referenceGeneSet <- newGeneSet & !is.na(mcols(grl)$GENEID) & strandedRanges
@@ -440,32 +443,31 @@ assignGeneIdsByReference <- function(grl, annotations, min.exonOverlap = 10,
440443
uniqueHits <- which(queryHits(ov) %in% which(countQueryHits(ov)==1))
441444
geneIds[queryHits(ov)[uniqueHits]] <-
442445
names(geneRanges)[subjectHits(ov)[uniqueHits]]
443-
444-
## next for non unique hits select one gene (maximum overlap)
445-
multiHits <- which(queryHits(ov) %in% which(countQueryHits(ov)>1))
446-
expandedRanges <- expandRangesList(ranges(grl[queryHits(ov)[multiHits]]),
447-
ranges(geneRanges[subjectHits(ov)[multiHits]]))
448-
rangeIntersect <- pintersect(expandedRanges,
449-
mcols(expandedRanges)$matchRng, resolve.empty = 'start.x')
450-
intersectById <- tapply(width(rangeIntersect),
451-
mcols(expandedRanges)$IdMap, sum)
452-
453-
filteredMultiHits <- as_tibble(ov[multiHits]) %>%
454-
mutate(intersectWidth = intersectById)
455-
if(fusionMode) {
456-
filteredMultiHits <- filteredMultiHits %>%
457-
filter(intersectWidth>min.exonOverlap) %>%
458-
mutate(geneid = names(geneRanges)[subjectHits]) %>% distinct() %>%
459-
group_by(queryHits) %>% summarise(geneid = paste(geneid, collapse=':'))
460-
geneIds[filteredMultiHits$queryHits] <- filteredMultiHits$geneid
461-
462-
} else {
463-
filteredMultiHits <- filteredMultiHits %>%
464-
group_by(queryHits) %>% arrange(desc(intersectWidth)) %>%
465-
dplyr::slice(1)
466-
geneIds[filteredMultiHits$queryHits] <-
467-
names(geneRanges)[filteredMultiHits$subjectHits]
468-
}
446+
if(length(ov)>0){
447+
## next for non unique hits select one gene (maximum overlap)
448+
multiHits <- which(queryHits(ov) %in% which(countQueryHits(ov)>1))
449+
rangeIntersect= intersect(ranges(grl[queryHits(ov)[multiHits]]),
450+
ranges(geneRanges[subjectHits(ov)[multiHits]]))
451+
filteredMultiHits = data.frame(queryHits = queryHits(ov)[multiHits],
452+
intersectWidth = sum(width(rangeIntersect)),
453+
subjectHits = subjectHits(ov)[multiHits]) %>%
454+
group_by(queryHits) %>% summarise(subjectHits = subjectHits[which.max(intersectWidth)],
455+
intersectWidth = max(intersectWidth))
456+
if(fusionMode) {
457+
filteredMultiHits <- filteredMultiHits %>%
458+
filter(intersectWidth>min.exonOverlap) %>%
459+
mutate(geneid = names(geneRanges)[subjectHits]) %>% distinct() %>%
460+
group_by(queryHits) %>% summarise(geneid = paste(geneid, collapse=':'))
461+
geneIds[filteredMultiHits$queryHits] <- filteredMultiHits$geneid
462+
463+
} else {
464+
filteredMultiHits <- filteredMultiHits %>%
465+
group_by(queryHits) %>% arrange(desc(intersectWidth)) %>%
466+
dplyr::slice(1)
467+
geneIds[filteredMultiHits$queryHits] <-
468+
names(geneRanges)[filteredMultiHits$subjectHits]
469+
}
470+
}
469471
return(geneIds)
470472
}
471473

R/bambu-quantify_utilityFunctions.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ modifyIncompatibleAssignment <- function(distTable){
2020
#' Process incompatible counts
2121
#' @noRd
2222
processIncompatibleCounts <- function(readClassDist){
23-
distTable <- data.table(as.data.frame(metadata(readClassDist)$distTable))[,
24-
.(readClassId, annotationTxId, readCount, GENEID, dist,equal)]
23+
distTable <- unique(data.table(as.data.frame(metadata(readClassDist)$distTable))[,
24+
.(readClassId, annotationTxId, readCount, GENEID, equal)], by = NULL)
2525
distTableIncompatible <- distTable[grep("unidentified", annotationTxId)]
2626
# filter out multiple geneIDs mapped to the same readClass using rowData(se)
2727
geneRCMap <- as.data.table(as.data.frame(rowData(readClassDist)),

R/bambu_utilityFunctions.R

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ checkInputSequence <- function(genomeSequence) {
170170
},
171171
error=function(cond) {
172172
stop("Input genome file not readable.",
173-
"Requires a FASTA or BSgenome name")
173+
" Requires a FASTA or BSgenome name")
174174
}
175175
)}
176176
return(genomeSequence)
@@ -184,24 +184,27 @@ handleWarnings <- function(readClassList, verbose){
184184
sampleNames = c()
185185
for(i in seq_along(readClassList)){
186186
readClassSe = readClassList[[i]]
187-
if (is.character(readClassSe))
188-
readClassSe <- readRDS(file = readClassSe)
189-
warnings[[i]] = metadata(readClassSe)$warnings
187+
if (is.character(readClassSe)){
188+
readClassSe <- readRDS(file = readClassSe)}
189+
warnings[[i]] = NA
190+
if(!is.null(metadata(readClassSe)$warnings)){
191+
warnings[[i]] = metadata(readClassSe)$warnings}
190192
sampleNames = c(sampleNames, colnames(readClassList[[i]]))
191193
}
192194
names(warnings) = sampleNames
193195

194-
if(verbose & any(lengths(warnings)>0)){
196+
if(verbose & any(!is.na(warnings))){
195197
message("--- per sample warnings during read class construction ---")
196-
for(i in seq_along(warnings)){
197-
if(lengths(warnings)[i]>0){
198-
message("Warnings for: ", sampleNames[i])
199-
sapply(warnings[[i]], message)
200-
}
198+
warnings.tmp = warnings[!is.na(warnings)]
199+
for(i in seq_along(warnings.tmp)){
200+
message("Warnings for: ", names(warnings.tmp)[i])
201+
sapply(warnings.tmp[[i]], message)
201202
}
202203
} else {
203-
message("Detected ", sum(lengths(warnings)), " warnings across the samples during ",
204-
"read class construction. Access warnings with metadata(bambuOutput)$warnings")
204+
warningCount = sum(lengths(warnings[!is.na(warnings)]))
205+
if(warningCount > 0){
206+
message("Detected ", warningCount, " warnings across the samples during ",
207+
"read class construction. Access warnings with metadata(bambuOutput)$warnings")}
205208
}
206209
return(warnings)
207210
}

R/sysdata.rda

-7 Bytes
Binary file not shown.

README.md

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ The bambuAnnotation object can be calculated from:
113113

114114
a) a .gtf file:
115115
```rscript
116-
annotations <- prepareAnnotation(gtf.file)
116+
annotations <- prepareAnnotations(gtf.file)
117117
```
118118
b) a TxDb object
119119
```rscript
@@ -444,7 +444,7 @@ se <- bambu(reads = fusionAligned.bam, annotations = fusionAnnotations, genome =
444444
|reads|A string or a vector of strings specifying the paths of bam files for genomic alignments, or a BamFile object or a BamFileList object (from Rsamtools).|
445445
| rcOutDir | A string variable specifying the path to where read class files will be saved. |
446446
| annotations | A TxDb object, a path to a .gtf file, or a GRangesList object obtained by prepareAnnotations. |
447-
| genome | A fasta file or a BSGenome object. |
447+
| genome | A fasta file or a BSGenome object. If a fa.gz is provided, the .fai and .gzi must also be present |
448448
| stranded | A boolean for strandedness, defaults to FALSE. |
449449
| ncore | specifying number of cores used when parallel processing is used, defaults to 1. |
450450
| NDR | specifying the maximum NDR rate to novel transcript output among detected transcripts, defaults to 0.1 |
@@ -498,7 +498,19 @@ rowData(se)
498498

499499
### Release History
500500

501-
**bambu v3.2.6**
501+
**bambu v3.2.5**
502+
503+
Release date: 2023-July-07
504+
505+
Minor changes:
506+
507+
- Fix crash when extremely large datasets provided
508+
- Speed up read class construction
509+
- Add LongRead BiocView
510+
- Update release history
511+
512+
513+
**bambu v3.2.4**
502514

503515
Release date: 2023-Apr-26
504516

@@ -592,7 +604,7 @@ Release date: 2020-06-18
592604
Release date: 2020-05-29
593605

594606
### Citation
595-
Chen, Ying, et al. "Context-Aware Transcript Quantification from Long Read RNA-Seq data with Bambu" bioRxiv (2022). doi: https://doi.org/10.1101/2022.11.14.516358
607+
Chen, Y., Sim, A., Wan, Y.K. et al. Context-aware transcript quantification from long-read RNA-seq data with Bambu. Nat Methods (2023). https://doi.org/10.1038/s41592-023-01908-w
596608

597609
### Contributors
598610

inst/CITATION

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
citEntry(entry="article",
2-
title = "Context-Aware Transcript Quantification from Long Read RNA-Seq data with Bambu",
2+
title = "Context-aware transcript quantification from long-read RNA-seq data with Bambu",
33
author = personList( as.person("Ying Chen"),
44
as.person("Andre Sim"),
55
as.person("Yuk Kei Wan"),
@@ -8,10 +8,10 @@ citEntry(entry="article",
88
as.person("Min Hao Ling"),
99
as.person("Michael I. Love"),
1010
as.person("Jonathan Göke")),
11-
year = 2022,
12-
journal = "bioRxiv",
13-
doi = "https://doi.org/10.1101/2022.11.14.516358",
11+
year = 2023,
12+
journal = "Nature Methods",
13+
doi = "https://doi.org/10.1038/s41592-023-01908-w",
1414
textVersion =
15-
paste("Chen, Y., Sim, A. D., Wan, Y. K., Yeo, K., Lee, J. J. X., Ling, M. H., ... & Göke, J.",
16-
"Context-Aware Transcript Quantification from Long Read RNA-Seq data with Bambu",
17-
"bioRxiv (2022)" ) )
15+
paste("Chen, Y., Sim, A., Wan, Y. K., Yeo, K., Lee, J. J. X., Ling, M. H., Love, M. I. & Göke, J.",
16+
"Context-aware transcript quantification from long-read RNA-seq data with Bambu",
17+
"Nat Methods (2023)" ) )
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)