GoekeLab
diff --git a/‎R/bambu-assignDist.R‎
Lines changed: 65 additions & 54 deletions b/‎R/bambu-assignDist.R‎
Lines changed: 65 additions & 54 deletions
diff --git a/‎R/bambu-extendAnnotations-utilityCombine.R‎
Lines changed: 9 additions & 6 deletions b/‎R/bambu-extendAnnotations-utilityCombine.R‎
Lines changed: 9 additions & 6 deletions
@@ -7,91 +7,102 @@ assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParame
                                         returnDistTable = FALSE, trackReads = TRUE) {
     if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
     metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable)
-    readClassList = splitReadClassFiles(readClassList)
+    readClassList <- splitReadClassFiles(readClassList)
     readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose) 
     readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById)
     readClassDt <- simplifyNames(readClassDt)
-    readClassDt = readClassDt %>% group_by(eqClassId, gene_sid) %>% 
-        mutate(multi_align = length(unique(txid))>1) %>% ungroup() %>% mutate(aval = 1) %>%
+    readClassDt <- readClassDt %>% group_by(eqClassId, gene_sid) %>% 
+        mutate(multi_align = length(unique(txid))>1) %>% 
+        ungroup() %>% 
+        mutate(aval = 1) %>%
         data.table()
     #return non-em counts
-    ColData = generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
+    ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
     quantData <- SummarizedExperiment(assays = SimpleList(
         counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
         rowRanges = annotations,
         colData = ColData)
-    colnames(quantData) = ColData$id
+    colnames(quantData) <- ColData$id
     if(sum(metadata(readClassList)$incompatibleCountMatrix)==0){
-        metadata(quantData)$incompatibleCounts = NULL
-    } else{
-        metadata(quantData)$incompatibleCounts = generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations)       
-    }
-    metadata(quantData)$nonuniqueCounts = generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)
-    metadata(quantData)$readClassDt = readClassDt
-    metadata(quantData)$countMatrix = metadata(readClassList)$countMatrix
-    metadata(quantData)$incompatibleCountMatrix  = metadata(readClassList)$incompatibleCountMatrix 
-    metadata(quantData)$sampleNames = metadata(readClassList)$sampleNames 
-    if(returnDistTable){
-        metadata(quantData)$distTable = metadata(metadata(readClassList)$readClassDist)$distTableOld
+        metadata(quantData)$incompatibleCounts <- NULL
+    }else{
+        metadata(quantData)$incompatibleCounts <- generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations)       
     }
-    if (trackReads){
-        metadata(quantData)$readToTranscriptMap = 
-            generateReadToTranscriptMap(readClassList, metadata(readClassList)$readClassDist, 
-                                  annotations)
-    } 
+    metadata(quantData)$nonuniqueCounts <- generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)
+    metadata(quantData)$readClassDt <- readClassDt
+    metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix
+    metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix 
+    metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames 
+    if(returnDistTable)
+        metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld
+
+    if(trackReads)
+        metadata(quantData)$readToTranscriptMap <- 
+            generateReadToTranscriptMap(readClassList, 
+                                        metadata(readClassList)$readClassDist, 
+                                        annotations)
+
     return(quantData)     
 
 }
 
-
+#' Generate unique counts
+#' @noRd
 generateUniqueCounts <- function(readClassDt, countMatrix, annotations){
-    x = readClassDt %>% filter(!multi_align & !is.na(eqClass.match))
-    uniqueCounts = countMatrix[x$eqClass.match,]
-    uniqueCounts.tx = sparse.model.matrix(~ factor(x$txid) - 1)
-    uniqueCounts = t(uniqueCounts.tx) %*% uniqueCounts
-    rownames(uniqueCounts) = names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)]
-    counts = sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0)
-    rownames(counts) = names(annotations)
-    counts[rownames(uniqueCounts),] = uniqueCounts
+    x <- readClassDt %>% filter(!multi_align & !is.na(eqClass.match))
+    uniqueCounts <- countMatrix[x$eqClass.match,]
+    uniqueCounts.tx <- sparse.model.matrix(~ factor(x$txid) - 1)
+    uniqueCounts <- t(uniqueCounts.tx) %*% uniqueCounts
+    rownames(uniqueCounts) <- names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)]
+    counts <- sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0)
+    rownames(counts) <- names(annotations)
+    counts[rownames(uniqueCounts),] <- uniqueCounts
     return(counts)
-
-    counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix)
-    counts.total[counts.total==0] = 1
-    counts.CPM = counts/counts.total * 10^6
+    
+    # these three lines appear after return, so it's not used, is this used for debug only?
+    # counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix)
+    # counts.total[counts.total==0] = 1
+    # counts.CPM = counts/counts.total * 10^6
 
 }
 
+
+#' Generate incompatible counts
+#' @noRd
 generateIncompatibleCounts <- function(incompatibleCountMatrix, annotations){
-    genes = levels(factor(unique(mcols(annotations)$GENEID)))
-    rownames(incompatibleCountMatrix) = genes[as.numeric(rownames(incompatibleCountMatrix))]
-    geneMat = sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0)
-    rownames(geneMat) = genes
-    geneMat[rownames(incompatibleCountMatrix),] = incompatibleCountMatrix
+    genes <- levels(factor(unique(mcols(annotations)$GENEID)))
+    rownames(incompatibleCountMatrix) <- genes[as.numeric(rownames(incompatibleCountMatrix))]
+    geneMat <- sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0)
+    rownames(geneMat) <- genes
+    geneMat[rownames(incompatibleCountMatrix),] <- incompatibleCountMatrix
     return(geneMat)
 }
 
+
+#' Generate non-unique counts
+#' @noRd
 generateNonUniqueCounts <- function(readClassDt, countMatrix, annotations){
     #fuse multi align RCs by gene
-    x = readClassDt %>% filter(multi_align & !is.na(eqClass.match))
-    x = x %>% distinct(eqClassId, .keep_all = TRUE)
-    nonuniqueCounts = countMatrix[x$eqClass.match,, drop = FALSE]
+    x <- readClassDt %>% filter(multi_align & !is.na(eqClass.match))
+    x <- x %>% distinct(eqClassId, .keep_all = TRUE)
+    nonuniqueCounts <- countMatrix[x$eqClass.match,, drop = FALSE]
     if(nrow(x)>1 & length(unique(x$gene_sid))>1){
-        nonuniqueCounts.gene = sparse.model.matrix(~ factor(x$gene_sid) - 1)
-        nonuniqueCounts = t(nonuniqueCounts.gene) %*% nonuniqueCounts
+        nonuniqueCounts.gene <- sparse.model.matrix(~ factor(x$gene_sid) - 1)
+        nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
     } else{
         warning("The factor variable 'gene_sid' has only one level. Adjusting output.")
-        nonuniqueCounts.gene = Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
-        nonuniqueCounts = t(nonuniqueCounts.gene) %*% nonuniqueCounts
+        nonuniqueCounts.gene <- Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE)
+        nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts
     }
     #covert ids into gene ids
-    geneids = as.numeric(levels(factor(x$gene_sid)))
-    geneids = x$txid[match(geneids, x$gene_sid)]
-    geneids = mcols(annotations)$GENEID[as.numeric(geneids)]
-    rownames(nonuniqueCounts) = geneids
+    geneids <- as.numeric(levels(factor(x$gene_sid)))
+    geneids <- x$txid[match(geneids, x$gene_sid)]
+    geneids <- mcols(annotations)$GENEID[as.numeric(geneids)]
+    rownames(nonuniqueCounts) <- geneids
     #create matrix for all annotated genes
-    genes = levels(factor(unique(mcols(annotations)$GENEID)))
-    geneMat = sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
-    rownames(geneMat) = genes
-    geneMat[rownames(nonuniqueCounts),] = nonuniqueCounts
+    genes <- levels(factor(unique(mcols(annotations)$GENEID)))
+    geneMat <- sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
+    rownames(geneMat) <- genes
+    geneMat[rownames(nonuniqueCounts),] <- nonuniqueCounts
     return(geneMat)
 }
@@ -19,7 +19,10 @@ isore.combineTranscriptCandidates <- function(readClassList,
         min.readCount, min.readFractionByGene, 
         min.txScore.multiExon, min.txScore.singleExon, verbose) %>% data.table()
     combinedSplicedTranscripts[,confidenceType := "highConfidenceJunctionReads"]
-    if (min.txScore.singleExon == 1) {return(combinedSplicedTranscripts)}
+    # when single exon min score is greater than 1, skip unspliced transcripts combination
+    # this is a very customized config, useful when data is very big 
+    if (min.txScore.singleExon > 1) 
+        return(combinedSplicedTranscripts)
     combinedUnsplicedTranscripts <- 
         combineUnsplicedTranscriptModels(readClassList, bpParameters, 
         stranded, min.readCount, min.readFractionByGene, 
@@ -36,11 +39,11 @@ isore.combineTranscriptCandidates <- function(readClassList,
 combineSplicedTranscriptModels <- function(readClassList, bpParameters, 
         min.readCount, min.readFractionByGene, min.txScore.multiExon, 
         min.txScore.singleExon, verbose){
-    bpParameters$progressbar = FALSE
+    bpParameters$progressbar <- FALSE
     options(scipen = 999) #maintain numeric basepair locations not sci.notfi.
     start.ptm <- proc.time()
     n_sample <- length(readClassList)
-    nGroups = max(ceiling(n_sample/10),min(bpworkers(bpParameters), 
+    nGroups <- max(ceiling(n_sample/10),min(bpworkers(bpParameters), 
                                             round(n_sample/2)))
     indexList <- sample(rep(seq_len(nGroups), length.out=n_sample))
     indexList <- splitAsList(seq_len(n_sample), indexList)
@@ -135,7 +138,7 @@ combineFeatureTibble <- function(combinedFeatureTibble,
             maxTxScore.noFit, NSampleReadCount, NSampleReadProp,NSampleTxScore, 
             starts_with('start'), starts_with('end'), starts_with('readCount'))
     } else { 
-        combinedTable = full_join(combinedFeatureTibble, 
+        combinedTable <- full_join(combinedFeatureTibble, 
             featureTibbleSummarised, by = c('intronStarts', 'intronEnds', 'chr',
             'strand'), suffix=c('.combined','.new')) %>% 
             mutate(NSampleReadCount=pmax0NA(NSampleReadCount.combined) + 
@@ -215,7 +218,7 @@ combineUnsplicedTranscriptModels <-
             min.readFractionByGene, min.txScore.multiExon,
             min.txScore.singleExon, verbose){
         start.ptm <- proc.time()
-        bpParameters$progressbar = FALSE
+        bpParameters$progressbar <- FALSE
         newUnsplicedSeList <- 
             bplapply(seq_along(readClassList), function(sample_id)
                 extractNewUnsplicedRanges(readClassSe = 
@@ -292,7 +295,7 @@ reduceUnsplicedRanges <- function(rangesList, stranded){
 makeUnsplicedTibble <- function(combinedNewUnsplicedSe,newUnsplicedSeList,
         colDataNames,min.readCount, min.readFractionByGene,
         min.txScore.multiExon, min.txScore.singleExon, bpParameters){
-        bpParameters$progressbar = FALSE
+        bpParameters$progressbar <- FALSE
     newUnsplicedTibble <- as_tibble(combinedNewUnsplicedSe) %>%
         rename(chr = seqnames) %>% select(chr, start, end, strand, row_id) %>%
         separate_rows(row_id, sep = "\\+")