Make a TxToGene object from transcriptome FASTA
Source:R/makeTxToGeneFromFasta.R
makeTxToGeneFromFasta.Rd
Make a TxToGene object from transcriptome FASTA
Arguments
- file
character(1)
. File path.- ignoreVersion
logical(1)
. Ignore identifier (e.g. transcript, gene) versions. When applicable, the identifier containing version numbers will be stored intxIdVersion
andgeneIdVersion
, and the variants without versions will be stored intxId
,txIdNoVersion
,geneId
, andgeneIdNoVersion
.
Details
RefSeq transcript FASTA (e.g. "GCF_000001405.39_GRCh38.p13_rna.fna.gz") doesn't contain gene identifiers, and is not supported.
Examples
## Ensembl ====
file <- AcidBase::pasteUrl(
"ftp.ensembl.org",
"pub",
"release-102",
"fasta",
"homo_sapiens",
"cdna",
"Homo_sapiens.GRCh38.cdna.all.fa.gz",
protocol = "ftp"
)
t2g <- makeTxToGeneFromFasta(file)
#> → Making <TxToGene> from FASTA file (ftp://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz).
#> → Importing /Users/mike/.cache/R/AcidGenomes/BiocFileCache/89e57fee89c8_Homo_sapiens.GRCh38.cdna.all.fa.gz using base::`readLines()`.
#> ℹ Ensembl transcriptome detected.
print(t2g)
#> TxToGene with 194360 rows and 2 columns
#> txId geneId
#> <character> <character>
#> 1 ENST00000000233.10 ENSG00000004059.11
#> 2 ENST00000000412.8 ENSG00000003056.8
#> 3 ENST00000000442.11 ENSG00000173153.17
#> 4 ENST00000001008.6 ENSG00000004478.8
#> 5 ENST00000001146.6 ENSG00000003137.8
#> ... ... ...
#> 194356 ENST00000679345.1 ENSG00000148334.16
#> 194357 ENST00000679346.1 ENSG00000182481.10
#> 194358 ENST00000679347.1 ENSG00000174080.12
#> 194359 ENST00000679348.1 ENSG00000115053.17
#> 194360 ENST00000679349.1 ENSG00000099284.15
## GENCODE ====
## GRCh38:
## > file <- AcidBase::pasteUrl(
## > "ftp.ebi.ac.uk",
## > "pub",
## > "databases",
## > "gencode",
## > "Gencode_human",
## > "release_32",
## > "gencode.v32.transcripts.fa.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
##
## GRCh37:
## > file <- AcidBase::pasteUrl(
## > "ftp.ebi.ac.uk",
## > "pub",
## > "databases",
## > "gencode",
## > "Gencode_human",
## > "release_44",
## > "GRCh37_mapping",
## > "gencode.v44lift37.transcripts.fa.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
## FlyBase ====
## > file <- AcidBase::pasteUrl(
## > "ftp.flybase.net",
## > "releases",
## > "FB2019_05",
## > "dmel_r6.30",
## > "fasta",
## > "dmel-all-transcript-r6.30.fasta.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
## WormBase ====
## > file <- AcidBase::pasteUrl(
## > "ftp.wormbase.org",
## > "pub",
## > "wormbase",
## > "releases",
## > "WS272",
## > "species",
## > "c_elegans",
## > "PRJNA13758",
## > "c_elegans.PRJNA13758.WS272.mRNA_transcripts.fa.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)