歡迎關(guān)注Bioinfor 生信云澎埠!
這一期我們來(lái)講講富集分析。
構(gòu)建OrgDB
OrgDB是bioconductor中存儲(chǔ)不同數(shù)據(jù)庫(kù)基因ID之間的對(duì)應(yīng)關(guān)系始藕,以及基因與GO等注釋的對(duì)應(yīng)關(guān)系的R軟件包蒲稳。共有19個(gè)物種,可以在這里下載
沒(méi)有OrgDB的物種可以通過(guò)AnnotationForge自己構(gòu)建
使用eggnog-mapper的注釋結(jié)果構(gòu)建OrgDB(私聊我獲取構(gòu)建OrgDB腳本)
#安裝自己構(gòu)建的OrgDB
dir.create('R_lib', recursive = T)
install.packages('org.My.eg.db_1.0.tar.gz',
repos = NULL,
lib = 'R_lib')
library(org.My.eg.db, lib = 'R_lib')
GO富集(自建OrgDB的物種)
gene <- filter(de_result,
abs(log2FoldChange) > 1 & padj < 0.05) %>%
pull(id)
geneList <- de_result$log2FoldChange
names(geneList) <- de_result$id
geneList <- sort(geneList, decreasing = T)
de_go <- enrichGO(gene = gene,
OrgDb = org.My.eg.db,
keyType = 'GID',
ont = 'ALL',
qvalueCutoff = 0.05,
pvalueCutoff = 0.05)
de_go_df <- as.data.frame(de_go)
barplot(de_ego, showCategory = 10)
dotplot(de_ego, showCategory = 10)
cnetplot(de_ego,
foldChange = geneList,
showCategory = 5,
node_label = "all", # category | gene | all | none
circular = TRUE,
colorEdge = TRUE)
GO富集(已有OrgDB的物種伍派,擬南芥為例)
library(org.At.tair.db)
de_go <- enrichGO(gene = gene,
OrgDb = org.At.tair.db,
keyType = 'TAIR',
ont = 'ALL',
qvalueCutoff = 0.05,
pvalueCutoff = 0.05)
barplot(de_go, showCategory = 10)
dotplot(de_go, showCategory = 10)
test <- pairwise_termsim(de_go)
p1 <- treeplot(test, showCategory = 30)
p2 <- treeplot(test, hclust_method = "average")
plot_grid(p1, p2, lables = c('A', 'B'), nrow = 2)
emapplot(test)
emapplot_cluster(test)
KEGG富集分析(不常見(jiàn)物種)
emapper <- read_delim('query_seqs.fa.emapper.annotations',
"\t", escape_double = FALSE, col_names = FALSE,
comment = "#", trim_ws = TRUE) %>%
dplyr::select(GID = X1,
KO = X9,
Pathway = X10)
pathway2gene <- dplyr::select(emapper, Pathway, GID) %>%
separate_rows(Pathway, sep = ',', convert = F) %>%
filter(str_detect(Pathway, 'ko')) %>%
mutate(Pathway = str_remove(Pathway, 'ko'))
library(magrittr)
get_path2name <- function(){
keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
colnames(keggpathid2name.df) <- c("path_id","path_name")
return(keggpathid2name.df)
}
pathway2name <- get_path2name()
ibrary(clusterProfiler)
de_kegg <- enricher(gene,
TERM2GENE = pathway2gene,
TERM2NAME = pathway2name,
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
de_kegg_df <- as.data.frame(de_kegg)
barplot(de_kegg, showCategory = 10)
dotplot(de_kegg, showCategory = 10)
cnetplot(de_kegg,
foldChange = geneList,
showCategory = 3,
node_label = "category", # category | gene | all | none
circular = TRUE,
colorEdge = TRUE)
KEGG富集分析(常見(jiàn)物種,擬南芥為例)
library(tidyverse)
library(clusterProfiler)
de_kegg <- enrichKEGG(gene,
keyType = 'kegg',
organism = 'ath',
pvalueCutoff = 0.05,
pAdjustMethod = 'BH')
barplot(de_kegg, showCategory = 10)
dotplot(de_kegg, showCategory = 10)
cnetplot(de_kegg,
foldChange = geneList,
showCategory = 3,
node_label = "category", # category | gene | all | none
circular = TRUE,
colorEdge = TRUE)