- Ensembl, EntreID,Symbol這三種基因名格式的互相轉(zhuǎn)換
#以人的12個(gè)基因(Symbol格式)為例
gene_symbol=c("RHO","CALM1","MEG3","GNGT1","SAG","RPGRIP1","TRPM1","PCP2","PCP4","AP1B1")
法1:org.Hs.eg.db包
library(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
gene_ids<-AnnotationDbi::select(org.Hs.eg.db, keys=as.character(gene_symbol),
columns=c("ENSEMBL","ENTREZID"), #目標(biāo)格式
keytype="SYMBOL") #目前的格式
gene_ids
library(org.Mm.eg.db)
keytypes(org.Mm.eg.db)
法2:biomaRt包
library("biomaRt")
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
attributes = listAttributes(ensembl)
attributes[1:5,]
# library(httr)
# httr::set_config(config(ssl_verifypeer = 0L))
gene_ids2 <- getBM(filters= "hgnc_symbol",
attributes= c("hgnc_symbol","ensembl_gene_id","entrezgene_id"),
values = gene_symbol, mart= ensembl)
gene_ids2
- 將老鼠基因轉(zhuǎn)為人類(lèi)基因名
musGenes <- c("Hmmr", "Tlx3", "Cpeb4")
法1:直接轉(zhuǎn)換
對(duì)于SYMBOL基因名格式勾笆,一般老鼠基因與人類(lèi)基因就是大小寫(xiě)字母的區(qū)別。
人類(lèi)基因名全部是大寫(xiě),而老鼠基因名只有第一個(gè)字母是大寫(xiě)碗脊,其余為小寫(xiě)。
toupper(musGenes)
# [1] "HMMR" "TLX3" "CPEB4"
但也有例外橄妆,所以嚴(yán)謹(jǐn)點(diǎn)衙伶,可以使用下面的方法。
法2:biomaRt包
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
genes = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol",
values = musGenes,
mart = mouse,
attributesL = c("hgnc_symbol"),
martL = human, uniqueRows=T)
# MGI.symbol HGNC.symbol
# 1 Cpeb4 CPEB4
# 2 Hmmr HMMR
# 3 Tlx3 TLX3