Import NCBI (Entrez) gene identifier information
Arguments
- organism
character(1)
. Full Latin organism name (e.g."Homo sapiens"
).- taxonomicGroup
character(1)
. NCBI FTP server taxonomic group subdirectory path (e.g. "Mammalia"). Defining this manually avoids having to query the FTP server.- cache
logical(1)
. Cache the gene info file from NCBI FTP server using BiocFileCache.
Examples
object <- NcbiGeneInfo(
organism = "Homo sapiens",
taxonomicGroup = "Mammalia"
)
#> → Downloading Homo sapiens gene info from NCBI at <https://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz>.
#> → Importing /Users/mike/.cache/R/AcidGenomes/BiocFileCache/10707413a06ea_Homo_sapiens.gene_info.gz using base::`read.table()`.
print(object)
#> NcbiGeneInfo with 193501 rows and 13 columns
#> chromosome
#> <Rle>
#> 1 19
#> 2 12
#> 3 12
#> 9 8
#> 10 8
#> ... ...
#> 8923215 MT
#> 8923216 MT
#> 8923217 MT
#> 8923218 MT
#> 8923219 MT
#> dbXrefs
#> <CharacterList>
#> 1 AllianceGenome:HGNC:5,Ensembl:ENSG00000121..,HGNC:HGNC:5,...
#> 2 AllianceGenome:HGNC:7,Ensembl:ENSG00000175..,HGNC:HGNC:7,...
#> 3 AllianceGenome:HGNC:8,Ensembl:ENSG00000291..,HGNC:HGNC:8
#> 9 AllianceGenome:HGNC:..,Ensembl:ENSG00000171..,HGNC:HGNC:7645,...
#> 10 AllianceGenome:HGNC:..,Ensembl:ENSG00000156..,HGNC:HGNC:7646,...
#> ... ...
#> 8923215
#> 8923216
#> 8923217
#> 8923218
#> 8923219
#> description featureType geneId geneName
#> <Rle> <Rle> <Rle> <Rle>
#> 1 alpha-1-B glycoprotein NA 1 A1BG
#> 2 alpha-2-macroglobulin NA 2 A2M
#> 3 alpha-2-macroglobuli.. NA 3 A2MP1
#> 9 N-acetyltransferase 1 NA 9 NAT1
#> 10 N-acetyltransferase 2 NA 10 NAT2
#> ... ... ... ... ...
#> 8923215 tRNA-Asp NA 8923215 trnD
#> 8923216 tRNA-Pro NA 8923216 trnP
#> 8923217 tRNA-Ala NA 8923217 trnA
#> 8923218 cytochrome c oxidase.. NA 8923218 COX1
#> 8923219 l-rRNA NA 8923219 16S rRNA
#> geneSynonyms mapLocation modificationDate nomenclatureStatus
#> <CharacterList> <Rle> <Date> <Rle>
#> 1 A1B,ABG,GAB,... 19q13.43 2025-03-08 O
#> 2 A2MD,CPAMD5,FWP007,... 12p13.31 2025-03-08 O
#> 3 A2MP 12p13.31 2025-03-04 O
#> 9 AAC1,MNAT,NAT-1,... 8p22 2025-03-08 O
#> 10 AAC2,NAT-2,PNAT 8p22 2025-03-08 O
#> ... ... ... ... ...
#> 8923215 NA 2020-09-09 NA
#> 8923216 NA 2020-09-09 NA
#> 8923217 NA 2020-09-09 NA
#> 8923218 NA 2023-08-18 NA
#> 8923219 NA 2020-09-09 NA
#> otherDesignations
#> <CharacterList>
#> 1 HEL-S-163pA,alpha-1B-glycoprotein,epididymis secretory..
#> 2 C3 and PZP-like alph..,alpha-2-M,alpha-2-macroglobulin
#> 3 pregnancy-zone prote..
#> 9 N-acetyltransferase ..,N-acetyltransferase ..,arylamide acetylase 1,...
#> 10 N-acetyltransferase ..,N-acetyltransferase ..,N-hydroxyarylamine O..,...
#> ... ...
#> 8923215
#> 8923216
#> 8923217
#> 8923218 cytochrome c oxidase..
#> 8923219
#> taxonomyId typeOfGene
#> <Rle> <Rle>
#> 1 9606 protein-coding
#> 2 9606 protein-coding
#> 3 9606 pseudo
#> 9 9606 protein-coding
#> 10 9606 protein-coding
#> ... ... ...
#> 8923215 741158 tRNA
#> 8923216 741158 tRNA
#> 8923217 741158 tRNA
#> 8923218 741158 protein-coding
#> 8923219 741158 rRNA