R/makeTx2GeneFromFASTA.R
makeTx2GeneFromFASTA.Rd
Make a Tx2Gene object from transcriptome FASTA
makeTx2GeneFromFASTA(file)
file |
|
---|
Tx2Gene
.
RefSeq transcript FASTA (e.g. "GCF_000001405.39_GRCh38.p13_rna.fna.gz") doesn't contain gene identifiers, and is not supported.
Updated 2021-01-29.
## Ensembl ==== file <- pasteURL( "ftp.ensembl.org", "pub", "release-102", "fasta", "homo_sapiens", "cdna", "Homo_sapiens.GRCh38.cdna.all.fa.gz", protocol = "ftp" ) t2g <- makeTx2GeneFromFASTA(file)#>#> 129e5a395880_Homo_sapiens.GRCh38.cdna.all.fa.gz at /opt/koopa/opt/r/cache/AcidGenomes using data.table::`fread()`.#> ℹ Ensembl transcriptome detected.#> Tx2Gene with 194360 rows and 2 columns #> txId geneId #> <character> <character> #> 1 ENST00000000233.10 ENSG00000004059.11 #> 2 ENST00000000412.8 ENSG00000003056.8 #> 3 ENST00000000442.11 ENSG00000173153.17 #> 4 ENST00000001008.6 ENSG00000004478.8 #> 5 ENST00000001146.6 ENSG00000003137.8 #> ... ... ... #> 194356 ENST00000679345.1 ENSG00000148334.16 #> 194357 ENST00000679346.1 ENSG00000182481.10 #> 194358 ENST00000679347.1 ENSG00000174080.12 #> 194359 ENST00000679348.1 ENSG00000115053.17 #> 194360 ENST00000679349.1 ENSG00000099284.15## GENCODE ==== ## > file <- pasteURL( ## > "ftp.ebi.ac.uk", ## > "pub", ## > "databases", ## > "gencode", ## > "Gencode_human", ## > "release_32", ## > "gencode.v32.transcripts.fa.gz", ## > protocol = "ftp" ## > ) ## > t2g <- makeTx2GeneFromFASTA(file) ## > print(t2g) ## FlyBase ==== ## > file <- pasteURL( ## > "ftp.flybase.net", ## > "releases", ## > "FB2019_05", ## > "dmel_r6.30", ## > "fasta", ## > "dmel-all-transcript-r6.30.fasta.gz", ## > protocol = "ftp" ## > ) ## > t2g <- makeTx2GeneFromFASTA(file) ## > print(t2g) ## WormBase ==== ## > file <- pasteURL( ## > "ftp.wormbase.org", ## > "pub", ## > "wormbase", ## > "releases", ## > "WS272", ## > "species", ## > "c_elegans", ## > "PRJNA13758", ## > "c_elegans.PRJNA13758.WS272.mRNA_transcripts.fa.gz", ## > protocol = "ftp" ## > ) ## > t2g <- makeTx2GeneFromFASTA(file) ## > print(t2g)