scRNA基礎(chǔ)分析-1:安裝包、導(dǎo)入數(shù)據(jù)、過(guò)濾質(zhì)控 - 簡(jiǎn)書(shū)
scRNA基礎(chǔ)分析-2:降維與聚類(lèi) - 簡(jiǎn)書(shū)
scRNA基礎(chǔ)分析-3:鑒定細(xì)胞類(lèi)型 - 簡(jiǎn)書(shū)
scRNA基礎(chǔ)分析-4:細(xì)胞亞類(lèi)再聚類(lèi)刨疼、注釋 - 簡(jiǎn)書(shū)
scRNA基礎(chǔ)分析-5:偽時(shí)間分析 - 簡(jiǎn)書(shū)
scRNA基礎(chǔ)分析-6:富集分析 - 簡(jiǎn)書(shū)
在之前的分析中,已經(jīng)將細(xì)胞分為多種不同的類(lèi)型间螟,例如cluster纤垂、cell type、stage等宛乃。以cluster為例悠咱,不同的cluster有哪些基因表達(dá)差異顯著,有何特別的意義征炼?富集分析可以幫助我們挖掘一些信息析既。
之前學(xué)習(xí)轉(zhuǎn)錄組時(shí),已了解有關(guān)富集分析的基本概念谆奥,詳見(jiàn)筆記眼坏。
library(Seurat)
library(tidyverse)
library(patchwork)
library(monocle)
library(clusterProfiler)
library(org.Hs.eg.db)
rm(list=ls())
scRNA <- readRDS("scRNA.rds")
mycds <- readRDS("mycds.rds")
1、差異分析
- 差異分析是富集分析的基礎(chǔ)酸些,基于前面的結(jié)果宰译,我們可以進(jìn)行多種差異分析檐蚜。
#比較cluster0和cluster1的差異表達(dá)基因
dge.cluster <- FindMarkers(scRNA,ident.1 = 0,ident.2 = 1)
sig_dge.cluster <- subset(dge.cluster, p_val_adj<0.01&abs(avg_logFC)>1)
#比較B_cell和T_cells的差異表達(dá)基因,后面的演示以此數(shù)據(jù)為例
dge.celltype <- FindMarkers(scRNA, ident.1 = 'B_cell', ident.2 = 'T_cells', group.by = 'celltype')
sig_dge.celltype <- subset(dge.celltype, p_val_adj<0.01&abs(avg_logFC)>1)
#比較擬時(shí)State1和State3的差異表達(dá)基因
p_data <- subset(pData(mycds),select='State')
scRNAsub <- subset(scRNA, cells=row.names(p_data))
scRNAsub <- AddMetaData(scRNAsub,p_data,col.name = 'State')
dge.State <- FindMarkers(scRNAsub, ident.1 = 1, ident.2 = 3, group.by = 'State')
sig_dge.State <- subset(dge.State, p_val_adj<0.01&abs(avg_logFC)>1)
2囤屹、go分析
ego_ALL <- enrichGO(gene = row.names(sig_dge.celltype),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_all <- data.frame(ego_ALL)
2-1
ego_CC <- enrichGO(gene = row.names(sig_dge.celltype),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_MF <- enrichGO(gene = row.names(sig_dge.celltype),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
ego_BP <- enrichGO(gene = row.names(sig_dge.celltype),
#universe = row.names(dge.celltype),
OrgDb = 'org.Hs.eg.db',
keyType = 'SYMBOL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
#截取每個(gè)description的前70個(gè)字符熬甚,方便后面作圖排版
ego_CC@result$Description <- substring(ego_CC@result$Description,1,70)
ego_MF@result$Description <- substring(ego_MF@result$Description,1,70)
ego_BP@result$Description <- substring(ego_BP@result$Description,1,70)
p_BP <- barplot(ego_BP,showCategory = 10) + ggtitle("barplot for Biological process")
p_CC <- barplot(ego_CC,showCategory = 10) + ggtitle("barplot for Cellular component")
p_MF <- barplot(ego_MF,showCategory = 10) + ggtitle("barplot for Molecular function")
plotc <- p_BP/p_CC/p_MF
2-2
3、kegg分析
genelist <- bitr(row.names(sig_dge.celltype), fromType="SYMBOL",
toType="ENTREZID", OrgDb='org.Hs.eg.db')
# kegg分析的基因名必須要是ENTREZID
genelist <- pull(genelist,ENTREZID)
ekegg <- enrichKEGG(gene = genelist, organism = 'hsa')
p1 <- barplot(ekegg, showCategory=20)
p2 <- dotplot(ekegg, showCategory=20)
plotc = p1/p2
2-3