準(zhǔn)備工作
## ----echo=FALSE, results='hide'
library(DOSE)
library(GO.db)
library(org.Hs.eg.db)
library(GSEABase)
library(clusterProfiler)
這里進(jìn)行包的導(dǎo)入
基因ID類型的轉(zhuǎn)換
bitr轉(zhuǎn)換
x <- c("GPX3", "GLRX", "LBP", "CRYAB", "DEFB1", "HCLS1", "SOD2", "HSPA2",
"ORM1", "IGFBP1", "PTHLH", "GPC3", "IGFBP3","TOB1", "MITF", "NDRG1",
"NR1H4", "FGFR3", "PVR", "IL6", "PTPRM", "ERBB2", "NID2", "LAMB1",
"COMP", "PLS3", "MCAM", "SPP1", "LAMC1", "COL4A2", "COL4A1", "MYOC",
"ANXA4", "TFPI2", "CST6", "SLPI", "TIMP2", "CPM", "GGT1", "NNMT",
"MAL", "EEF1A2", "HGD", "TCN2", "CDA", "PCCA", "CRYM", "PDXK",
"STC1", "WARS", "HMOX1", "FXYD2", "RBP4", "SLC6A12", "KDELR3", "ITM2B")
keytypes(org.Hs.eg.db)
eg = bitr(x, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
ids <- bitr(x, fromType="SYMBOL", toType=c("UNIPROT", "ENSEMBL"), OrgDb="org.Hs.eg.db")
參數(shù):
- x:基因ID向量
- fromType:目前基因ID類型
- toType:目的轉(zhuǎn)換ID類型挪哄,可通過向量模式表示多ID類型
- OrgDb:使用的注釋數(shù)據(jù)包,通過keytypes()函數(shù)查看支持的類型
返回:
- 各類型ID組成的數(shù)據(jù)框
注:
- 對(duì)于groupGO, enrichGO和gseGO函數(shù)六荒,可以通過keyType參數(shù)指定類型说铃,從而省去類型轉(zhuǎn)換的過程(后續(xù))玄柠。
bitr_kegg轉(zhuǎn)換
data(gcSample)
hg <- gcSample[[1]]
search_kegg_organism(“human”,by="common_name")
eg2np <- bitr_kegg(hg, fromType='kegg', toType='ncbi-proteinid', organism='hsa')
參數(shù):
- hg:基因的ID向量
- fromType:目前基因ID類型
- toType:目標(biāo)基因ID類型
- organism:支持的物種類型
返回:
- 各類型ID組成的數(shù)據(jù)框
注:
- 兩個(gè)Type參數(shù)取值必須為以下四種:
kegg:KEGG庫中的ID
ncbi-geneid:NCBI中對(duì)因的基因ID
ncbi-proteinid:NCBI中對(duì)應(yīng)的蛋白ID
uniprot:带射? - organism參數(shù)可以通過search_kegg_organism()函數(shù)獲取取值,常用的“hsa”為人類
GO分析
GO分類
data(geneList, package="DOSE")
gene <- names(geneList)[abs(geneList) > 2]
gene.df <- bitr(gene, fromType = "ENTREZID",
toType = c("ENSEMBL", "SYMBOL"),
OrgDb = org.Hs.eg.db)
ggo <- groupGO(gene = gene,
OrgDb = org.Hs.eg.db,
ont = "CC",
level = 3,
readable = TRUE)
參數(shù):
- gene:基因ID組成的向量
- OrgDb:注釋數(shù)據(jù)庫
- ont:GO描述功能的方式
- lebel:GO術(shù)語的水平
- readable:T時(shí)争剿,以symbols形式輸出
返回:
- 數(shù)據(jù)框
- count:基因數(shù)目
- GeneRatio:基因的比例
GO過表達(dá)測(cè)試
ego <- enrichGO(gene = gene,
universe = names(geneList),
OrgDb = org.Hs.eg.db,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
參數(shù):
- gene:基因向量
- universe:背景基因,默認(rèn)為GO數(shù)據(jù)庫痊末,給定后也是和GO取交集
- OrgDb:數(shù)據(jù)庫
- ont:GO屬于方面
- pAdjustMethod:p值矯正反法
- pcalueCutoff:p值閾值
- qvalueCutoff:q值閾值
- readable:是否使用smybols形式輸出
返回:
- 數(shù)據(jù)框
- GeneRatio:樣本基因中對(duì)因功能基因占樣本基因總體的比例(總體是與對(duì)應(yīng)庫取交集的結(jié)果數(shù)目)
- BgRatio:數(shù)據(jù)庫中對(duì)因功能基因占數(shù)據(jù)庫基因總體的比例(總體是與對(duì)應(yīng)庫取交集的結(jié)果數(shù)目)
- count:基因數(shù)目
注:
- dropGO()蚕苇,simplify()丟掉想去掉的GO
- gofilter()將GO分析限制在具體的level
- 可以添加keyType參數(shù),不建議使用凿叠,有的類型會(huì)有重復(fù)
GO基因集富集分析
ego3 <- gseGO(geneList = geneList,
OrgDb = org.Hs.eg.db,
ont = "CC",
nPerm = 1000,
minGSSize = 100,
maxGSSize = 500,
pvalueCutoff = 0.05,
verbose = FALSE)
參數(shù):
- geneList:進(jìn)行富集的基因列表
- OrgDb:注釋數(shù)據(jù)庫
- ont:GO術(shù)語方面
- nPerm:置換的數(shù)目
- minGSSize:用于測(cè)試的功能集最小容量
- maxGSSize:用于測(cè)試的功能集最大容量
- pvalueCutoff:p值閾值
- verbose:是否打印信息
返回:
- 數(shù)據(jù)框
- setSize:基因集的基因數(shù)目
- enrichmentScore:ES分?jǐn)?shù)
- NES:標(biāo)準(zhǔn)化后的ES分?jǐn)?shù)
- rank:涩笤?
- leading_edge:前沿集
- core_enrichment:富集的主要基因
KEGG分析
KEGG過表達(dá)分析
kk <- enrichKEGG(gene = gene,
organism = 'hsa',
pvalueCutoff = 0.05)
KEGG基因集富集分析
kk2 <- gseKEGG(geneList = geneList,
organism = 'hsa',
nPerm = 1000,
minGSSize = 120,
pvalueCutoff = 0.05,
verbose = FALSE)
KEGG模塊過表達(dá)測(cè)試
mkk <- enrichMKEGG(gene = gene,
organism = 'hsa')
KEGG模塊基因集富集分析
mkk2 <- gseMKEGG(geneList = geneList,
mkk2 <- gseMKEGG(geneList = mydata,
organism = 'hsa')
疾病分析
可視化
就是各種函數(shù)
barplot(ggo, drop=TRUE, showCategory=12)
dotplot(ego)
emapplot(ego)
cnetplot(ego, categorySize="pvalue", foldChange=geneList)
goplot(ego)
gseaplot(kk2, geneSetID = "hsa04145")
browseKEGG(kk, 'hsa04110')
library("pathview")
hsa04110 <- pathview(gene.data = geneList,
pathway.id = "hsa04110",
species = "hsa",
limit = list(gene=max(abs(geneList)), cpd=1))
多基因集功能對(duì)比
ck <- compareCluster(geneCluster = gcSample, fun = "enrichKEGG")
參數(shù):
- geneCluster:帶有命名的列表
- fun:方法取值可以是groupGO,enrichGO盒件,enrichKEGG蹬碧,enrichDO或者enrichPathway
分類交叉(公式接口)
mydf <- data.frame(Entrez=names(geneList), FC=geneList)
mydf <- mydf[abs(mydf$FC) > 1,]
mydf$group <- "upregulated"
mydf$group[mydf$FC < 0] <- "downregulated"
mydf$othergroup <- "A"
mydf$othergroup[abs(mydf$FC) > 2] <- "B"
formula_res <- compareCluster(Entrez~group+othergroup, data=mydf, fun="enrichKEGG")
參數(shù):
-
:通過公式指定分類組合方法
- data:進(jìn)行分析的數(shù)據(jù)
- fun:采取的分析方法
功能對(duì)比的可視化
dotplot(ck)
dotplot(formula_res)
參數(shù):
- showCategory:每個(gè)基因集繪制其多少功能類
- by:點(diǎn)大小的含義,可以設(shè)置為geneRatio炒刁,count恩沽,rowPercentage
注:
- 點(diǎn)的顏色代表p值,越紅證明越富集
參考資料:Guangchuang Yu.2018."Statistical analysis and visualization of functional profiles for genes and gene clusters".http://www.bioconductor.org/packages/release/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html#go-analysis