單細(xì)胞富集分析系列:
單細(xì)胞富集分析我最常用的是分組GSVA湃累,但最近用到了GO分析蒜焊,就復(fù)習(xí)一下GO和KEGG富集分析及繪圖。
1. 數(shù)據(jù)集準(zhǔn)備
library(Seurat)
library(patchwork)
library(clusterProfiler)
library(org.Mm.eg.db) ##加載小鼠
library(org.Hs.eg.db) ##加載人類
library(tidyverse)
載入無比熟悉的pbmc.3k數(shù)據(jù)集 (已注釋好猜拾,數(shù)據(jù)準(zhǔn)備見monocle)
pbmc <-readRDS("pbmc.rds")
table(pbmc$cell_type)
# Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T FCGR3A+ Mono
# 711 480 472 344 279 162
# NK DC Platelet
# 144 32 14
pbmc3k數(shù)據(jù)集只有1個(gè)樣本府适,沒辦法區(qū)分HC和病例組羔飞。
若有分組,可以使用subset函數(shù)將某種細(xì)胞取出檐春,來做這種細(xì)胞病例組和對照組相比的差異基因和富集分析
2. 計(jì)算差異基因
- 使用seurat包的
FindMarkers
來計(jì)算差異基因逻淌。
ident.1是病例組,ident.2是對照組疟暖。(這里只做演示卡儒,計(jì)算的是和Naive CD4 T相比,Memory CD4 T的差異基因)
dge.celltype <- FindMarkers(pbmc, ident.1 = 'Memory CD4 T',ident.2 = 'Naive CD4 T',
group.by = 'cell_type',logfc.threshold = 0,min.pct = 0)
saveRDS(dge.celltype, file = "deg.rds")
sig_dge.all <- subset(dge.celltype, p_val_adj<0.05&abs(avg_log2FC)>0.15) #所有差異基因
View(sig_dge.all)
- 分組可視化
sig_dge.up <- subset(dge.celltype, p_val_adj<0.05&avg_log2FC>0.15)
sig_dge.up <- sig_dge.up[order(sig_dge.up$avg_log2FC,decreasing = T),]
sig_dge.up_TOP30 <- rownames(sig_dge.up[1:30,])
sig_dge.down <- subset(dge.celltype, p_val_adj<0.05&avg_log2FC< -0.15)
sig_dge.down <- sig_dge.down[order(sig_dge.down$avg_log2FC,decreasing = T),]
sig_dge.down_TOP30 <- rownames(sig_dge.down[1:30,])
diffall <-c(sig_dge.up_TOP30,sig_dge.down_TOP30)
Idents(pbmc) <- 'cell_type'
pbmc_sub <- subset(pbmc,ident=c('Memory CD4 T','Naive CD4 T'))
Idents(pbmc_sub) <- 'cell_type'
View(pbmc_sub)
matrix <- AverageExpression(object = pbmc_sub,assays = 'RNA',slot = "scale.data")[[1]]
matrix <- matrix[rownames(matrix)%in%diffall,]
matrix[matrix>2]=2;matrix[matrix< -2]= -2
p=pheatmap( matrix ,show_colnames =T,
show_rownames = T,
cluster_cols = T, cluster_row = T,
border_color = NA,
color = colorRampPalette(c("navy", "white", "firebrick3"))(50))
save_pheatmap_pdf <- function(x, filename, width=8, height=15) {
stopifnot(!missing(x))
stopifnot(!missing(filename))
pdf(filename, width=width, height=height)
grid::grid.newpage()
grid::grid.draw(x$gtable)
dev.off()
}
save_pheatmap_pdf(p, "diff_heatmap.pdf")
3. GO富集分析(分為BP, CC和MF)
# BP, CC和MF三種通路都一起富集
ego_ALL <- enrichGO(gene = row.names(sig_dge.all),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "ALL", #設(shè)置為ALL時(shí)BP, CC, MF都計(jì)算
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_all <- data.frame(ego_ALL)
write.csv(ego_ALL,'enrichGO_all.csv')
View(ego_all)
# 分別對BP, CC和MF進(jìn)行富集
ego_CC <- enrichGO(gene = row.names(sig_dge.all),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_cc <- data.frame(ego_CC)
write.csv(ego_cc,'enrichGO_cc.csv')
ego_MF <- enrichGO(gene = row.names(sig_dge.all),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_mf <- data.frame(ego_MF)
write.csv(ego_mf,'enrichGO_mf.csv')
ego_BP <- enrichGO(gene = row.names(sig_dge.all),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_bp <- data.frame(ego_BP)
write.csv(ego_bp,'enrichGO_bp.csv')
繪圖
- 最普通的圖俐巴,也是一般生信公司出報(bào)告的圖骨望,略丑。
p_BP <- barplot(ego_BP,showCategory = 10) + ggtitle("barplot for Biological process")
p_CC <- barplot(ego_CC,showCategory = 10) + ggtitle("barplot for Cellular component")
p_MF <- barplot(ego_MF,showCategory = 10) + ggtitle("barplot for Molecular function")
plotc <- p_BP/p_CC/p_MF
ggsave('enrichGO.pdf', plotc, width = 12,height = 10)
- 使用ggplot繪圖(更靈活)
# 我一般只畫bp圖欣舵,感覺更有意義擎鸠。
ego_bp <- ego_bp[order(ego_bp$p.adjust),]
ego_bp_top30 <- ego_bp[1 : 30,]
ggplot(data=ego_bp_top30, aes(x=Description,y=Count)) +
geom_bar(stat="identity", width=0.8,fill='salmon1') +
coord_flip() + xlab("GO term") + ylab("Num of Genes") +
theme_bw()
之所以長短不齊不按順序是因?yàn)闆]有排序
#按照p值排序
ego_bp <- ego_bp[order(ego_all$pvalue,decreasing = T),]
ego_bp$Description <- factor(ego_bp$Description, levels = ego_bp$Description)
排完續(xù)之后再畫p值就是按順序的了
4. KEGG富集分析
genelist <- bitr(row.names(sig_dge.all), fromType="SYMBOL",
toType="ENTREZID", OrgDb='org.Hs.eg.db')
genelist <- pull(genelist,ENTREZID)
ekegg <- enrichKEGG(gene = genelist, organism = 'hsa')
p1 <- barplot(ekegg, showCategory=20)
p2 <- dotplot(ekegg, showCategory=20)
plotc = p1/p2
ggsave("enrichKEGG.png", plot = plotc, width = 12, height = 10)