Skip to content

Commit b4277ca

Browse files
committed
Add summariseByExon function
1 parent fdf8db5 commit b4277ca

1 file changed

Lines changed: 76 additions & 0 deletions

File tree

R/summariseByExon.R

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#' Summarise transcript expression to exon-level expression
2+
#' @title summarise by exon
3+
#' @param se a \code{SummarizedExperiment} object from \code{\link{bambu}}
4+
#' @return A data.table with columns: exon_id, seqnames, start, end, strand,
5+
#' GENEID, and one count column per sample
6+
#' @details Counts are summed across all transcripts that share the same exon
7+
#' (defined by identical seqnames, start, end, and strand). The returned
8+
#' counts therefore represent the total evidence attributed to each unique
9+
#' exonic locus across all overlapping transcripts.
10+
#' @import data.table
11+
#' @importFrom Matrix sparseMatrix
12+
#' @importFrom SummarizedExperiment assays rowRanges rowData
13+
#' @importFrom GenomicRanges seqnames start end strand
14+
#' @export
15+
summariseByExon <- function(se) {
16+
# Unlist GRangesList: one row per exon-transcript combination
17+
exonRanges <- unlist(rowRanges(se), use.names = TRUE)
18+
19+
# Build a data.table of exon-transcript pairs
20+
txNames <- rownames(se)
21+
exonDt <- data.table(
22+
TXNAME = names(exonRanges),
23+
seqnames = as.character(seqnames(exonRanges)),
24+
start = start(exonRanges),
25+
end = end(exonRanges),
26+
strand = as.character(strand(exonRanges))
27+
)
28+
29+
# Unique exon key: seqnames:start:end:strand
30+
exonDt[, exon_id := paste(seqnames, start, end, strand, sep = ":")]
31+
32+
# Attach GENEID from rowData
33+
geneDt <- data.table(
34+
TXNAME = rownames(se),
35+
GENEID = rowData(se)$GENEID
36+
)
37+
exonDt <- geneDt[exonDt, on = "TXNAME"]
38+
39+
# Collapse metadata per unique exon
40+
exonMeta <- exonDt[, .(
41+
seqnames = seqnames[1],
42+
start = start[1],
43+
end = end[1],
44+
strand = strand[1],
45+
GENEID = paste(sort(unique(GENEID)), collapse = ",")
46+
), by = exon_id]
47+
48+
# Build sparse binary matrix: unique_exons x transcripts
49+
# entry [i, j] = 1 if transcript j contains unique exon i
50+
uniqueExons <- exonMeta$exon_id
51+
exonIdx <- match(exonDt$exon_id, uniqueExons)
52+
txIdx <- match(exonDt$TXNAME, txNames)
53+
54+
exonTxMat <- sparseMatrix(
55+
i = exonIdx,
56+
j = txIdx,
57+
x = 1L,
58+
dims = c(length(uniqueExons), length(txNames)),
59+
dimnames = list(uniqueExons, txNames)
60+
)
61+
62+
# Aggregate counts: unique_exons x samples
63+
txCounts <- assays(se)$counts
64+
exonCounts <- exonTxMat %*% txCounts
65+
66+
# Combine metadata with aggregated counts
67+
result <- cbind(
68+
exonMeta,
69+
as.data.table(as.matrix(exonCounts))
70+
)
71+
72+
# Sort by genomic position
73+
result <- result[order(seqnames, start, end)]
74+
75+
return(result)
76+
}

0 commit comments

Comments
 (0)