Make a TxToGene object from transcriptome FASTA
Source:R/makeTxToGeneFromFasta.R
makeTxToGeneFromFasta.RdMake a TxToGene object from transcriptome FASTA
Arguments
- file
character(1). File path.- ignoreVersion
logical(1). Ignore identifier (e.g. transcript, gene) versions. When applicable, the identifier containing version numbers will be stored intxIdVersionandgeneIdVersion, and the variants without versions will be stored intxId,txIdNoVersion,geneId, andgeneIdNoVersion.
Details
RefSeq transcript FASTA (e.g. "GCF_000001405.39_GRCh38.p13_rna.fna.gz") doesn't contain gene identifiers, and is not supported.
Examples
## Ensembl ====
file <- AcidBase::pasteUrl(
"ftp.ensembl.org",
"pub",
"release-102",
"fasta",
"homo_sapiens",
"cdna",
"Homo_sapiens.GRCh38.cdna.all.fa.gz",
protocol = "ftp"
)
t2g <- makeTxToGeneFromFasta(file)
#> → Making <TxToGene> from FASTA file (ftp://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz).
#> → Importing /Users/mike/.cache/R/AcidGenomes/BiocFileCache/107071d2b20f4_Homo_sapiens.GRCh38.cdna.all.fa.gz using base::`readLines()`.
#> ℹ Ensembl transcriptome detected.
print(t2g)
#> TxToGene with 194360 rows and 2 columns
#> txId geneId
#> <character> <character>
#> 1 ENST00000000233.10 ENSG00000004059.11
#> 2 ENST00000000412.8 ENSG00000003056.8
#> 3 ENST00000000442.11 ENSG00000173153.17
#> 4 ENST00000001008.6 ENSG00000004478.8
#> 5 ENST00000001146.6 ENSG00000003137.8
#> ... ... ...
#> 194356 ENST00000679345.1 ENSG00000148334.16
#> 194357 ENST00000679346.1 ENSG00000182481.10
#> 194358 ENST00000679347.1 ENSG00000174080.12
#> 194359 ENST00000679348.1 ENSG00000115053.17
#> 194360 ENST00000679349.1 ENSG00000099284.15
## GENCODE ====
## GRCh38:
## > file <- AcidBase::pasteUrl(
## > "ftp.ebi.ac.uk",
## > "pub",
## > "databases",
## > "gencode",
## > "Gencode_human",
## > "release_32",
## > "gencode.v32.transcripts.fa.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
##
## GRCh37:
## > file <- AcidBase::pasteUrl(
## > "ftp.ebi.ac.uk",
## > "pub",
## > "databases",
## > "gencode",
## > "Gencode_human",
## > "release_44",
## > "GRCh37_mapping",
## > "gencode.v44lift37.transcripts.fa.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
## FlyBase ====
## > file <- AcidBase::pasteUrl(
## > "ftp.flybase.net",
## > "releases",
## > "FB2019_05",
## > "dmel_r6.30",
## > "fasta",
## > "dmel-all-transcript-r6.30.fasta.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
## WormBase ====
## > file <- AcidBase::pasteUrl(
## > "ftp.wormbase.org",
## > "pub",
## > "wormbase",
## > "releases",
## > "WS272",
## > "species",
## > "c_elegans",
## > "PRJNA13758",
## > "c_elegans.PRJNA13758.WS272.mRNA_transcripts.fa.gz",
## > protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)