Skip to contents

Make a TxToGene object from transcriptome FASTA

Usage

makeTxToGeneFromFasta(file, ignoreVersion = FALSE)

Arguments

file

character(1). File path.

ignoreVersion

logical(1). Ignore identifier (e.g. transcript, gene) versions. When applicable, the identifier containing version numbers will be stored in txIdVersion and geneIdVersion, and the variants without versions will be stored in txId, txIdNoVersion, geneId, and geneIdNoVersion.

Value

TxToGene.

Details

RefSeq transcript FASTA (e.g. "GCF_000001405.39_GRCh38.p13_rna.fna.gz") doesn't contain gene identifiers, and is not supported.

Note

Updated 2023-11-28.

Examples

## Ensembl ====
file <- AcidBase::pasteUrl(
    "ftp.ensembl.org",
    "pub",
    "release-102",
    "fasta",
    "homo_sapiens",
    "cdna",
    "Homo_sapiens.GRCh38.cdna.all.fa.gz",
    protocol = "ftp"
)
t2g <- makeTxToGeneFromFasta(file)
#> → Making <TxToGene> from FASTA file (ftp://ftp.ensembl.org/pub/release-102/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz).
#> → Importing /Users/mike/.cache/R/AcidGenomes/BiocFileCache/89e57fee89c8_Homo_sapiens.GRCh38.cdna.all.fa.gz using base::`readLines()`.
#>  Ensembl transcriptome detected.
print(t2g)
#> TxToGene with 194360 rows and 2 columns
#>                      txId             geneId
#>               <character>        <character>
#> 1      ENST00000000233.10 ENSG00000004059.11
#> 2       ENST00000000412.8  ENSG00000003056.8
#> 3      ENST00000000442.11 ENSG00000173153.17
#> 4       ENST00000001008.6  ENSG00000004478.8
#> 5       ENST00000001146.6  ENSG00000003137.8
#> ...                   ...                ...
#> 194356  ENST00000679345.1 ENSG00000148334.16
#> 194357  ENST00000679346.1 ENSG00000182481.10
#> 194358  ENST00000679347.1 ENSG00000174080.12
#> 194359  ENST00000679348.1 ENSG00000115053.17
#> 194360  ENST00000679349.1 ENSG00000099284.15

## GENCODE ====
## GRCh38:
## > file <- AcidBase::pasteUrl(
## >     "ftp.ebi.ac.uk",
## >     "pub",
## >     "databases",
## >     "gencode",
## >     "Gencode_human",
## >     "release_32",
## >     "gencode.v32.transcripts.fa.gz",
## >     protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)
##
## GRCh37:
## > file <- AcidBase::pasteUrl(
## >     "ftp.ebi.ac.uk",
## >     "pub",
## >     "databases",
## >     "gencode",
## >     "Gencode_human",
## >     "release_44",
## >     "GRCh37_mapping",
## >     "gencode.v44lift37.transcripts.fa.gz",
## >     protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)

## FlyBase ====
## > file <- AcidBase::pasteUrl(
## >     "ftp.flybase.net",
## >     "releases",
## >     "FB2019_05",
## >     "dmel_r6.30",
## >     "fasta",
## >     "dmel-all-transcript-r6.30.fasta.gz",
## >     protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)

## WormBase ====
## > file <- AcidBase::pasteUrl(
## >     "ftp.wormbase.org",
## >     "pub",
## >     "wormbase",
## >     "releases",
## >     "WS272",
## >     "species",
## >     "c_elegans",
## >     "PRJNA13758",
## >     "c_elegans.PRJNA13758.WS272.mRNA_transcripts.fa.gz",
## >     protocol = "ftp"
## > )
## > t2g <- makeTxToGeneFromFasta(file)
## > print(t2g)