Take a user-defined gene vector and dynamically map the input to either the object rownames or the gene names (symbols). These functions are useful for writing code that needs to handle either gene identifier or gene name input dynamically (e.g. for single-cell RNA-seq marker analysis).
Usage
mapGenesToRownames(object, ...)
mapGenesToIds(object, ...)
mapGenesToSymbols(object, ...)
# S4 method for SummarizedExperiment
mapGenesToRownames(object, genes, strict = TRUE)
# S4 method for SummarizedExperiment
mapGenesToIds(object, genes, strict = TRUE)
# S4 method for SummarizedExperiment
mapGenesToSymbols(object, genes, strict = TRUE)
Arguments
- object
Object.
- genes
character
. Gene identifiers.- strict
logical(1)
. Require all genes to match. Recommended by default. If setFALSE
, instead will return a warning to the user, and subset the genes vector to only include matches.- ...
Additional arguments.
Ambiguous gene names
Some genomes (e.g. Homo sapiens, Mus musculus) contain duplicated gene names
for multiple gene identifiers. Normally we handle these ambiguous gene names
by sanitizing them with make.names
. If a user requests a gene name that
is duplicated, these functions will return a warning.
Examples
data(RangedSummarizedExperiment, package = "AcidTest")
## SummarizedExperiment ====
object <- RangedSummarizedExperiment
rownames <- head(rownames(object))
print(rownames)
#> [1] "gene001" "gene002" "gene003" "gene004" "gene005" "gene006"
g2s <- GeneToSymbol(object)
geneIds <- head(g2s[["geneId"]])
print(geneIds)
#> [1] "ENSG00000000003.15" "ENSG00000000005.6" "ENSG00000000419.12"
#> [4] "ENSG00000000457.14" "ENSG00000000460.17" "ENSG00000000938.13"
geneNames <- head(g2s[["geneName"]])
print(geneNames)
#> [1] "TSPAN6" "TNMD" "DPM1" "SCYL3" "C1orf112" "FGR"
## Row names.
mapGenesToRownames(object, genes = rownames)
#> gene001 gene002 gene003 gene004 gene005 gene006
#> "gene001" "gene002" "gene003" "gene004" "gene005" "gene006"
mapGenesToRownames(object, genes = geneIds)
#> ENSG00000000003.15 ENSG00000000005.6 ENSG00000000419.12 ENSG00000000457.14
#> "gene001" "gene002" "gene003" "gene004"
#> ENSG00000000460.17 ENSG00000000938.13
#> "gene005" "gene006"
mapGenesToRownames(object, genes = geneNames)
#> TSPAN6 TNMD DPM1 SCYL3 C1orf112 FGR
#> "gene001" "gene002" "gene003" "gene004" "gene005" "gene006"
## Gene identifiers.
mapGenesToIds(object, genes = rownames)
#> gene001 gene002 gene003
#> "ENSG00000000003.15" "ENSG00000000005.6" "ENSG00000000419.12"
#> gene004 gene005 gene006
#> "ENSG00000000457.14" "ENSG00000000460.17" "ENSG00000000938.13"
mapGenesToIds(object, genes = geneIds)
#> ENSG00000000003.15 ENSG00000000005.6 ENSG00000000419.12
#> "ENSG00000000003.15" "ENSG00000000005.6" "ENSG00000000419.12"
#> ENSG00000000457.14 ENSG00000000460.17 ENSG00000000938.13
#> "ENSG00000000457.14" "ENSG00000000460.17" "ENSG00000000938.13"
mapGenesToIds(object, genes = geneNames)
#> TSPAN6 TNMD DPM1
#> "ENSG00000000003.15" "ENSG00000000005.6" "ENSG00000000419.12"
#> SCYL3 C1orf112 FGR
#> "ENSG00000000457.14" "ENSG00000000460.17" "ENSG00000000938.13"
## Gene names (symbols).
mapGenesToSymbols(object, genes = rownames)
#> gene001 gene002 gene003 gene004 gene005 gene006
#> "TSPAN6" "TNMD" "DPM1" "SCYL3" "C1orf112" "FGR"
mapGenesToSymbols(object, genes = geneIds)
#> ENSG00000000003.15 ENSG00000000005.6 ENSG00000000419.12 ENSG00000000457.14
#> "TSPAN6" "TNMD" "DPM1" "SCYL3"
#> ENSG00000000460.17 ENSG00000000938.13
#> "C1orf112" "FGR"
mapGenesToSymbols(object, genes = geneNames)
#> TSPAN6 TNMD DPM1 SCYL3 C1orf112 FGR
#> "TSPAN6" "TNMD" "DPM1" "SCYL3" "C1orf112" "FGR"