lesson6 富集分析

1.AUCell package

假設存在一通路A有20個相關基因功舀，要評價通路的表達情況纽甘，可以用AUCell對這20個相關基因進行打分鱼鼓，得分高代表通路表達較高钉跷。

①加載package與數(shù)據(jù)

##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())


setwd("C:/Users/86269/Desktop/shun.C/single_cell")
###加載所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
library(harmony)
library(cowplot)
library(ggplot2)
load("scRNA1.rdata")
library(AUCell)
library(ggplot2)
library(Seurat)
library(clusterProfiler)

②對細胞進行排序

cells_rankings <- AUCell_buildRankings(scRNA1@assays$RNA@data,nCores=1, plotStats=TRUE)

cell_rankings

這一步是對每單個細胞中每個基因的表達量進行排序，每個細胞的每個基因都得到一個排序序號

③下載用于參考的細胞通路對應的基因

在GSEA broad上進行下載

GSEA broad

通路數(shù)據(jù)庫簡介
1.GO（gene ontology疆前，即基因本體論）：基因本體論是對基因在不同維度和不同層次上的描述
a. Cellular Component(CC):解釋基因在細胞中的位置
b. Biological Process(BP):說明該基因參與了哪些生物學過程
c. Molecular Function(MF):說明該基因在分子層面的功能
2.KEGG:對細胞所有pathway進行富集分析,KEGG數(shù)據(jù)庫共有186條pathway
二者的區(qū)別在于數(shù)據(jù)庫不同

下載KEGG pathway數(shù)據(jù)后進行讀取

c2 <- read.gmt("c2.cp.kegg.v7.5.1.symbols.gmt")

提取每個通路對應的基因

geneSets <- lapply(unique(c2$term), function(x){print(x);c2$gene[c2$term == x]})
names(geneSets) <- unique(c2$term)

geneSets

④基因轉換

cell_rankings 是鼠的基因寒跳，而geneSets是人的pathway數(shù)據(jù)，需要用msigdbr包將geneSets轉換成對應的鼠的通路數(shù)據(jù)

library(msigdbr)
m_df<- msigdbr(species = "Mus musculus",  category = "C2", subcategory = "KEGG")
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

fgsea_sets

⑤基因富集計算

cells_AUC <- AUCell_calcAUC(fgsea_sets, cells_rankings,nCores =1, aucMaxRank=nrow(cells_rankings)*0.1)
#提取氧化磷酸化通路相關的基因
geneSet <- "KEGG_OXIDATIVE_PHOSPHORYLATION"
aucs <- as.numeric(getAUC(cells_AUC)[geneSet, ])
#合并metadata信息與降維結果
df<- data.frame(scRNA1@meta.data, scRNA1@reductions$umap@cell.embeddings)
#將seurat cluster信息注釋到圖上
class_avg <- df %>%
  group_by(seurat_clusters) %>%
  summarise(
    UMAP_1 = median(UMAP_1),
    UMAP_2 = median(UMAP_2)
  )

⑥可視化

p <-ggplot(df, aes(UMAP_1, UMAP_2))  +
  geom_point(aes(colour  = AUC)) + viridis::scale_color_viridis(option="E") +
  ggrepel::geom_label_repel(aes(label = seurat_clusters),
                            data = class_avg,
                            size = 5,
                            label.size = 1,
                            segment.color = NA
  )+   theme(legend.position = "none") + theme_bw()

氧化磷酸化通路活性

分組展示

p<- p+facet_grid(.~orig.ident)

2.過表征分析

①基于GO數(shù)據(jù)庫的分析

接下來將對DepiNeg1峡继、DepiNeg2兩個樣本的差異基因進行分析冯袍，探究它們的差異基因主要與哪些功能有關
找到差異基因


degdf <- FindMarkers(scRNA1,ident.1 = "DapiNeg1",ident.2 = "DapiNeg2", 
                     logfc.threshold = 0.3,group.by = "orig.ident",subset.ident=1)

分析BP類通路

library(org.Mm.eg.db)
degs.list=rownames(degdf)
erich.go.BP = enrichGO(gene =degs.list,
                       OrgDb = org.Mm.eg.db,
                       keyType = "SYMBOL",
                       ont = "BP",
                       pvalueCutoff = 0.05,
                       qvalueCutoff = 0.05)
dotplot(erich.go.BP,showCategory = 15 )
barplot(erich.go.BP,showCategory = 8)
erich.go.BP=erich.go.BP@result

BP dotplot

BP barplot

分析CC類通路

erich.go.CC = enrichGO(gene =degs.list,
                       OrgDb = org.Mm.eg.db,
                       keyType = "SYMBOL",
                       ont = "CC",
                       pvalueCutoff = 0.5,
                       qvalueCutoff = 0.5)
dotplot(erich.go.CC,showCategory = 8)
barplot(erich.go.CC,showCategory = 8)

CC dotplot

CC barplot

分析MF類通路

erich.go.MF = enrichGO(gene =degs.list,
                       OrgDb = org.Mm.eg.db,
                       keyType = "SYMBOL",
                       ont = "MF",
                       pvalueCutoff = 0.5,
                       qvalueCutoff = 0.5)
dotplot(erich.go.MF,showCategory = 8)
barplot(erich.go.MF,showCategory = 8)

MF dotplot

MF barplot

②基于KEGG數(shù)據(jù)庫分析

轉換基因名
將差異基因的基因名從symble名轉化為entrezID

DEG.entrez_id = mapIds(x = org.Mm.eg.db,
                       keys =  degs.list,
                       keytype = "SYMBOL",
                       column = "ENTREZID")

進行基于KEGG的富集

enrich.kegg.res <- enrichKEGG(gene = DEG.entrez_id,
                             organism = "mmu",
                             keyType = "kegg")
dotplot(enrich.kegg.res)

如果出現(xiàn)報錯則運行以下代碼

library(R.utils)
R.utils::setOption('clusterProfiler.download.method','auto')

KEGG富集 dotplot

將結果進行保存

result <- enrich.kegg.res@result
write.table(result,"enrich.kegg.res.txt",sep = "\t",col.names = NA)

enrich.kegg.res

挑選其中感興趣的通路進行作圖
以第12-16，21-23碾牌，27-29個通路為例，將這些通路制成一個txt文件
儡循，并讀取該文件進行作圖

kegg=read.table("KEGG.8.20.txt",header = T,sep = "\t")

kegg

可視化

k=data.frame(kegg)
library(ggplot2)
library(dplyr)
#提取GeneRatio的前與后的值
before <- as.numeric(sub("/\\d+$", "", k$GeneRatio))
after <- as.numeric(sub("^\\d+/", "", k$GeneRatio))
k$GeneRatio = before /after
font.size =10
k %>% 
  ## 按p值排序
  arrange(p.adjust) %>% 
  ## 開始ggplot2 作圖舶吗，其中fct_reorder調(diào)整因子level的順序
  ggplot(aes(GeneRatio,forcats::fct_reorder(Description,Count)))+ 
  ## 畫出點圖
  geom_point(aes(color=p.adjust, size = Count)) +
  ## 調(diào)整顏色，guide_colorbar調(diào)整色圖的方向
  scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=TRUE))+
  ## 調(diào)整泡泡的大小
  scale_size_continuous(range=c(3, 8))+
  ## 如果用ylab("")或出現(xiàn)左側空白
  labs(y=NULL) +
  ## 如果沒有這一句择膝，上方會到頂
  ggtitle("")+
  ## 設定主題
  theme_bw() +
  theme(axis.text.x = element_text(colour = "black",
                                   size = font.size, vjust =1 ),
        axis.text.y = element_text(colour = "black",
                                   size = font.size, hjust =1 ),
        axis.title = element_text(margin=margin(10, 5, 0, 0),
                                  color = "black",size = font.size),
        axis.title.y = element_text(angle=90))

③可視化展示富集結果

讀取數(shù)據(jù)誓琼，完成降維聚類、注釋

##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())

setwd("C:/Users/86269/Desktop/shun.C/single_cell")
load("scRNA_harmony.Rdata")
library(dplyr)
library(Seurat)
library(tidyverse)
library(patchwork)
library(SingleR)
load("ref_Human_all.RData")


refdata <- ref_Human_all

testdata <- GetAssayData(scRNA_harmony, slot="data")
clusters <- scRNA_harmony@meta.data$seurat_clusters
###開始用singler分析
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main, 
                    method = "cluster", clusters = clusters, 
                    assay.type.test = "logcounts", assay.type.ref = "logcounts")

celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = FALSE)
scRNA_harmony@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  scRNA_harmony@meta.data[which(scRNA_harmony@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}

以cluster0為例，找到cluster0的marker基因并進行GO富集

#對cluster0尋找marker基因
deg =FindMarkers(scRNA_harmony,ident.1 = "sample_11",ident.2 = "sample_3",group.by = "orig.ident",
                 subset.ident = "0")
degs.list=rownames(deg)
#GO BP富集
library(org.Hs.eg.db)
library(clusterProfiler)
erich.go.BP = enrichGO(gene =degs.list,
                       OrgDb = org.Hs.eg.db,
                       keyType = "SYMBOL",
                       ont = "BP",
                       pvalueCutoff = 0.5,
                       qvalueCutoff = 0.5)

dotplot(erich.go.BP,showCategory = 30)

富集結果dotplot

挑選感興趣的通路進行作圖

erich.go.BP=erich.go.BP@result

enrich.go.BP

挑選3個感興趣的通路信息制成txt文件

goplot.txt

#提取用于作圖的信息
library(GOplot)
go1= read.table("goplot.txt",sep = "\t",header = T)
go1=erich.go.BP[1:3,c(1,2,8,6)]
rownames(go1)=go1$ID
go=go1

#調(diào)整go數(shù)據(jù)框
library(stringr)
go$geneID=str_replace_all(go$geneID,"/",",")
names(go)=c('ID','Term','Genes','adj_pval')
go$Category="BP"

#提取每個通路對應的基因
x1=strsplit(go$Genes[1],split=",",fixed=T)
x2=strsplit(go$Genes[2],split=",",fixed=T)
x3=strsplit(go$Genes[3],split=",",fixed=T)
g1=c(x1[[1]],x2[[1]],x3[[1]])
#從差異分析結果deg中提取這些基因的信息并整合
genedata1=deg[g1,]   
genedata1$ID=rownames(genedata1)
genedata2=data.frame(ID=genedata1$ID,logFC=genedata1$avg_log2FC)

genedata1

genedata2

各種形式可視化

circ <- circle_dat(go,genedata2)
##條形圖
GOBar(subset(circ, category == 'BP'))
#氣泡圖
GOBubble(circ, labels = 3)
#圈圖
GOCircle(circ, nsub = 3)

Bar Plot

bubble plot

circ plot

#和弦圖
chord<-chord_dat(circ, genedata2)
GOChord(chord, gene.order = 'logFC')
#基因與GO Term的熱圖
GOHeat(chord, nlfc = 1, fill.col = c('red', 'yellow', 'green'))

chord plot

heatmap

3.GSVA富集

GSVA富集不僅可以得出差異基因富集于哪個通路腹侣，還能分析出該通路相關的基因綜合表達是上調(diào)還是下調(diào)叔收。GSVA先將基因富集到各通路上，再通過差異分析找出差異表達的通路傲隶。

GSVA原理

①pipeline

輸入表達矩陣饺律，得到GSVA評分矩陣
準備分析數(shù)據(jù)與package

library(GSEABase)
library(GSVA)
library(msigdbr)

###準備參考數(shù)據(jù)集###
m_df<- msigdbr(species = "human",  category = "H" )
#將gene_symbol按gs_name分組
geneSets <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)
###準備分析數(shù)據(jù)集###
exp=AverageExpression(scRNA_harmony,add.ident = "orig.ident") 
exp=exp[["RNA"]]
#挑選sample11與sample3的cluster0、1跺株、2進行分析
counts2=exp[,c(1:6)]

geneSets

counts2

進行GSVA富集

GSVA_hall <- gsva(expr=as.matrix(counts2), 
                  gset.idx.list=geneSets, 
                  mx.diff=T, # 數(shù)據(jù)為正態(tài)分布則T复濒，雙峰則F
                  kcdf="Poisson", #CPM, RPKM, TPM數(shù)據(jù)就用默認值"Gaussian"， read count數(shù)據(jù)則為"Poisson"
                   )

GSVA_hall

limma進行差異通路分析

# 設置分組矩陣
group <- factor(c(rep("s.11",3), rep("s.3", 3)), levels = c( 's.11','s.3'))
design <- model.matrix(~0+group)
colnames(design) = levels(factor(group))
rownames(design) = colnames(GSVA_hall)

design

###進行差異分析###
#創(chuàng)建比較對象乒省，此處是s.3與s.11比較
compare <- makeContrasts(s.3 - s.11, levels=design)
#線性擬合
fit <- lmFit(GSVA_hall, design)
fit2 <- contrasts.fit(fit, compare)
#貝葉斯
fit3 <- eBayes(fit2)
#提取差異分析結果
Diff <- topTable(fit3, coef=1, number=200)

Diff

可視化

dat_plot <- data.frame(id = row.names(Diff),
                       t = Diff$t)
# 去掉"HALLMARK_"
library(stringr)
dat_plot$id <- str_replace(dat_plot$id , "HALLMARK_","")
# 變成因子類型
dat_plot$id <- factor(dat_plot$id,levels = dat_plot$id)
# 新增一列 根據(jù)t閾值分類
dat_plot$threshold = factor(ifelse(dat_plot$t  >-1, ifelse(dat_plot$t >= 1 ,'Up','NoSignifi'),'Down'),levels=c('Up','Down','NoSignifi'))
# 按t值排序
dat_plot <- dat_plot %>% arrange(t)

dat_plot

library(ggplot2)
library(ggthemes)
library(ggprism)
p <- ggplot(data = dat_plot,aes(x = id,y = t,fill = threshold)) +
  geom_col()+
  coord_flip() +#橫縱坐標對調(diào)
  scale_fill_manual(values = c('Up'= '#36638a','NoSignifi'='#cccccc','Down'='#7bcd7b')) +
  geom_hline(yintercept = c(-2,2),color = 'white',size = 0.5,lty='dashed') +
  xlab('') + 
  ylab('t value of GSVA score, S.11 VS S.3') + 
  guides(fill=F)+ # 不顯示圖例
  theme_prism(border = T) +
  theme(
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  )
###添加標簽###
# 小于-1的數(shù)量
low1 <- dat_plot %>% filter(t < -1) %>% nrow()
# 小于0總數(shù)量
low0 <- dat_plot %>% filter( t < 0) %>% nrow()
# 小于1總數(shù)量
high0 <- dat_plot %>% filter(t < 1) %>% nrow()
# 總的柱子數(shù)量
high1 <- nrow(dat_plot)

# 依次從下到上添加標簽
p <- p + geom_text(data = dat_plot[1:low1,],aes(x = id,y = 0.1,label = id),
                   hjust = 0,color = 'black',size=2) + # 小于-1的為黑色標簽
  geom_text(data = dat_plot[(low1 +1):low0,],aes(x = id,y = 0.1,label = id),
            hjust = 0,color = 'grey',size=2) + # 灰色標簽
  geom_text(data = dat_plot[(low0 + 1):high0,],aes(x = id,y = -0.1,label = id),
            hjust = 1,color = 'grey',size=2) + # 灰色標簽
  geom_text(data = dat_plot[(high0 +1):high1,],aes(x = id,y = -0.1,label = id),
            hjust = 1,color = 'black',size=2) # 大于1的為黑色標簽
p

pheatmap::pheatmap(GSVA_hall, #熱圖的數(shù)據(jù)
                   cluster_rows = T,#行聚類
                   cluster_cols =T,#列聚類巧颈，可以看出樣本之間的區(qū)分度
                   fontsize = 5,
                   show_colnames=T,
                   scale = "row", #以行來標準化
                   color =colorRampPalette(c("#FF7744", "white","#AAAAAA","#0044BB"))(100))

heatmap

4.自定義通路

某些情況下直接用GO、KEGG袖扛、hallmark等數(shù)據(jù)庫已有的基因集進行富集分析得不到理想結果砸泛，就可以自定義通路與通路對應的基因進行GSVA分析（通過文獻或已有資料刪除對通路起負反饋效果的基因等）。

①GSVA自定義分析

準備自定義的通路數(shù)據(jù)集
需要準備1.基因名稱的txt文件,2.通路名稱文件
先在excel表中進行操作蛆封，再復制粘貼到txt中
在excel表中準備數(shù)據(jù)時需要多加一個自定義的xyz通路晾嘶，并保證該通路基因數(shù)最多，因為在后續(xù)去掉表中的空值時最長的一列（即沒有空值的一列）會被全部去除

#讀取文件
gene.set=read.table("gene.txt",
                    header =F,sep = '\t',quote = '',fill=TRUE)
kegg.123=read.table("genesetname.txt",
                    header =F,sep = '\t',quote = '')
#將行轉為基因娶吞，列轉為通路
gene.set1=as.matrix(gene.set)
gene.set2=t(gene.set1)
#去掉空值
gmt=list()
for (i in 1:11) {
  y=as.character(gene.set2[,i]) 
  b=which(y=="")
  gmt[[i]]=y[-b]
}
#對通路命名
names(gmt)=kegg.123$V1
#去掉xyz通路
gmt=gmt[-1]

gmt

②GO富集自定義分析

找出差異基因

##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())

setwd("C:/Users/86269/Desktop/shun.C/single_cell")
load("scRNA_harmony.Rdata")
library(dplyr)
library(Seurat)
library(tidyverse)
library(patchwork)
library(SingleR)
load("ref_Human_all.RData")


refdata <- ref_Human_all

testdata <- GetAssayData(scRNA_harmony, slot="data")
###把scRNA數(shù)據(jù)中的seurat_clusters提取出來垒迂，注意這里是因子類型的
clusters <- scRNA_harmony@meta.data$seurat_clusters
###開始用singler分析
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main, 
                    method = "cluster", clusters = clusters, 
                    assay.type.test = "logcounts", assay.type.ref = "logcounts")

celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = FALSE)
scRNA_harmony@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  scRNA_harmony@meta.data[which(scRNA_harmony@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}

sc.t=scRNA_harmony[,rownames(subset(scRNA_harmony@meta.data,celltype=="T_cells"))]  
deg =FindMarkers(scRNA_harmony,ident.1 = "sample_11",ident.2 = "sample_3",group.by = "orig.ident",
                 subset.ident = "0")

進行自定義的GO分析
用enricher()進行自定義分析，enrichGO()的分析數(shù)據(jù)都來源于網(wǎng)絡上的數(shù)據(jù)庫妒蛇，無法修改机断，但enricher()依靠自定義的通路數(shù)據(jù)，方便修改绣夺。
enricher的通路數(shù)據(jù)TERM2GENE是包含兩列的數(shù)據(jù)框吏奸，一列為通路，一列為基因

#準備通路數(shù)據(jù)
m_df <-msigdbr(species = "human",category = "H")
m_df <- m_df[,c(3,4)]
#進行富集分析
res<-enricher(gene = rownames(deg),TERM2GENE = m_df)

5.代謝活性通路富集

僅關注與代謝有關的通路

##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())

setwd("C:/Users/86269/Desktop/shun.C/single_cell")

install.packages("wesanderson")
install.packages("C:/Users/86269/Desktop/shun.C/single_cell/phangorn_2.8.1.zip", repos = NULL, type = "win.binary")
install.packages("C:/Users/86269/Desktop/shun.C/single_cell/quadprog_1.5-8.zip", repos = NULL, type = "win.binary")
devtools::install_github("YosefLab/VISION")
devtools::install_github("wu-yc/scMetabolism")


library(scMetabolism)
library(ggplot2)
library(rsvd)

load(file = "pbmc_demo.rda")

countexp.Seurat<-sc.metabolism.Seurat(obj = countexp.Seurat, method = "VISION", imputation = F, ncores = 2, metabolism.type = "KEGG")



DimPlot.metabolism(obj = countexp.Seurat, pathway = "Glycolysis / Gluconeogenesis", dimention.reduction.type = "umap", dimention.reduction.run = F, size = 1)

##Dot plot
input.pathway<-c("Glycolysis / Gluconeogenesis", "Oxidative phosphorylation", "Citrate cycle (TCA cycle)")
DotPlot.metabolism(obj = countexp.Seurat, pathway = input.pathway, phenotype = "ident", norm = "y")



## Box plot
BoxPlot.metabolism(obj = countexp.Seurat, pathway = input.pathway, phenotype = "ident", ncol = 1)

metabolism.matrix<-sc.metabolism(countexp = countexp, method = "VISION", imputation = F, ncores = 2, metabolism.type = "KEGG")

dimplot

dotplot

boxplot

6.GSEA富集分析

傳統(tǒng)的KEGG或GO富集分析的缺點是規(guī)定閾值進行篩選陶耍，但一條通路上不同的基因表達量可能會有很大差異胶背，上游基因的微小變化可能造成下游基因明顯變化，因此規(guī)定閾值進行篩選會損失很多基因信息歹茶。而GSEA方法不需要規(guī)定閾值進行篩選捎拯，規(guī)避了傳統(tǒng)富集方法的缺點。

①Pipeline

##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())

setwd("C:/Users/86269/Desktop/shun.C/single_cell")
###加載所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
library(harmony)
library(cowplot)
library(ggplot2)
load("scRNA1.rdata")
#按細胞類型分組毯欣，尋找差異基因
Idents(scRNA1)="celltype"
deg=FindMarkers(scRNA1,ident.1 = "Adipocytes",ident.2 = "Granulocytes",
                min.pct = 0.01,logfc.threshold = 0.01)
#將avg_log2FC按從大到小提嚷（GSEA函數(shù)的要求）
genelist=deg$avg_log2FC
#把鼠的基因名改為人的基因名，二者的差別就在于大小寫不同
names(genelist)=toupper(rownames(deg))
genelist=sort(genelist,decreasing = T)
###進行富集分析與可視化###
library(clusterProfiler)
library(org.Hs.eg.db)
library(GSEABase)
library(enrichplot)
#讀取通路信息
geneset=read.gmt("c2.cp.kegg.v7.5.1.symbols.gmt")
#進行GSEA富集
egmt=GSEA(genelist,TERM2GENE = geneset,
          minGSSize = 1,pvalueCutoff = 0.5)#TERM2GENE可以自行篩選
#查看前兩條通路的富集情況
gseaplot2(egmt,geneSetID =c(1,2),pvalue_table = T,base_size = 5)

富集結果

第一部分為基因Enrichment Score的折線圖酗钞，大概意思是對差異基因排序列表進行掃描腹忽，每遇到一個參與該通路的差異基因来累，得分就會上升，如果遇到一個差異基因不在該通路里窘奏，就罰分嘹锁；score縱軸為對應的Running ES, 在折線圖中有個峰值，該峰值就是這個基因集的Enrichemnt score着裹，峰值之前的基因就是該基因集下的核心基因领猾，也稱為領頭亞集基因，表明對該條通路最終得分貢獻最大的基因求冷。
第二部分用線條標記位于該基因集下的基因瘤运，從左到右按照log2FC從高到低進行排序。
第三部分的縱坐標代表FoldChange值匠题。
從圖中可看出一個通路的基因富集在FoldChange>0的區(qū)域拯坟，說明該通路上調(diào)，另一個則富集在FoldChange<0的區(qū)域韭山，說明該通路下調(diào)郁季。
將上調(diào)與下調(diào)的通路分別進行批量可視化并儲存圖片

kegg.res=egmt@result

down.kegg.res<-kegg.res[(kegg.res$p_adjust<0.01 & kegg.res$enrichmentScore < -1.5),]
down.kegg.res$group=1

up.kegg.res<-kegg.res[(kegg.res$p_adjust<0.01 & kegg.res$enrichmentScore > 1.5),]
up.kegg.res$group=1

lapply(1:nrow(down.kegg.res), function(i){
  gseaplot2(egmt,down.kegg.res$ID[i],title = down.kegg.res$Description[i],pvalue_table = T)
  ggsave(paste0(gsub("/","_",down.kegg.res$Description[i]),".down.pdf"),width = 11,height =5)
}
)

down.kegg.res$ID[1]
lapply(1:nrow(up.kegg.res), function(i){
  gseaplot2(egmt,up.kegg.res$ID[i],title = up.kegg.res$Description[i],pvalue_table = T)
  ggsave(paste0(gsub("/","-",up.kegg.res$Description[i]),".up.pdf"),width = 11,height =5)
}
)

②自定義通路

fgsea能夠快速對預選基因集進行GSEA富集分析，預選基因集可以是自己設定钱磅，一般使用MSigdb數(shù)據(jù)庫（同樣由提出GSEA方法團隊提供）梦裂。

library(msigdbr)
library(fgsea)
#提取預選基因集
m_df<- msigdbr(species = "Mus musculus",  category = "C2", subcategory = "KEGG")
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

#準備用于富集分析的基因數(shù)據(jù)
genelist=deg$avg_log2FC
names(genelist)= rownames(deg)
genelist=sort(genelist,decreasing = T)

#進行富集
fgseaRes<- fgsea(fgsea_sets, stats = genelist )

#進行可視化,篩選p值<0.005的20個通路，NES<1.5的視為下調(diào)基因盖淡，反之則是上調(diào)
ggplot(fgseaRes %>% filter(padj < 0.005) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
  geom_col(aes(fill= NES < 1.5)) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title="Hallmark pathways NES from GSEA") +
  theme_minimal() ####以1.5進行繪圖填色

fgseaRes

③自定義畫圖

加載數(shù)據(jù)年柠，尋找差異基因

##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())

setwd("C:/Users/86269/Desktop/shun.C/single_cell")
###加載所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
library(harmony)
library(cowplot)
library(ggplot2)
library(clusterProfiler)
library(enrichplot)
library(plyr)
library(ggrepel)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
load("scRNA1.rdata")

Idents(scRNA1)="celltype"

deg=FindMarkers(scRNA1,ident.1 = "Adipocytes",ident.2 = "Granulocytes",
                min.pct = 0.01,logfc.threshold = 0.01)

進行富集分析

#按照FoldChange排序
gsym.fc.id.sorted <- deg[order(deg$avg_log2FC,decreasing = T),]

#將基因ID從symbol轉化為entrez
expm.id <- bitr(rownames(gsym.fc.id.sorted), fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Mm.eg.db")
exp.fc.id <- merge(expm.id, gsym.fc.id.sorted,by.x=1, by.y=0 )

#按FoldChange排序
gsym.fc.id.sorted <- exp.fc.id[order(exp.fc.id$avg_log2FC,decreasing = T),]
#獲得ENTREZID、foldchange列表褪迟，做為GSEA的輸入
id.fc <- gsym.fc.id.sorted$avg_log2FC
names(id.fc) <- gsym.fc.id.sorted$ENTREZID

#進行gseKEGG富集分析
library(R.utils)
R.utils::setOption("clusterProfiler.download.method","auto")
#這一條語句就做完了KEGG的GSEA分析
kk <- gseKEGG(id.fc, organism = "mmu")
#選擇兩條通路作圖
gseaplot2(kk, geneSetID = c(1,2), 
          color = c("darkgreen","chocolate4")) #自定義顏色

自定義作圖
作圖前數(shù)據(jù)準備

#把ENTREZ ID轉為gene symbol冗恨，便于查看通路里的基因
kk.gsym <- setReadable(kk, 'org.Mm.eg.db', #物種
                       'ENTREZID')
#按照enrichment score從高到低排序，便于查看富集的通路
sortkk <- kk.gsym[order(kk.gsym$enrichmentScore, decreasing = T),]
# 要畫的通路 
geneSetID <- c("mmu03010", "mmu00982")
# 突出顯示感興趣的基因
selectedGeneID <- c("Rps28","Rps29","Rps26","Rps2","Rpl22l1")



# 自定義足夠多的顏色
mycol <- c("darkgreen","chocolate4","blueviolet","#223D6C","#D20A13","#088247","#58CDD9","#7A142C","#5D90BA","#431A3D","#91612D","#6E568C","#E0367A","#D8D155","#64495D","#7CC767")
# 字號
base_size = 12
# rank mode
rankmode <- "comb" #合起來畫
#rankmode <- "sep" #如果通路多味赃，分開畫更好

#合并多條通路的數(shù)據(jù)
gsdata <- do.call(rbind, lapply(geneSetID,enrichplot:::gsInfo, object = x))#對兩條通路分別用gsInfo提取富集結果
gsdata$gsym <- rep(gsym.fc.id.sorted$SYMBOL,2)

分別畫出GSEA富集圖的三個部分

# 畫running score
p.res <- ggplot(gsdata, aes_(x = ~x)) + xlab(NULL) +
  geom_line(aes_(y = ~runningScore, color= ~Description), size=1) +
  scale_color_manual(values = mycol) +
  
  scale_x_continuous(expand=c(0,0)) + #兩側不留空
  geom_hline(yintercept = 0, lty = 2, lwd = 0.2) + #在0的位置畫虛線
  ylab("Enrichment\n Score") +
  
  theme_bw(base_size) +
  theme(panel.grid = element_blank()) + #不畫網(wǎng)格
  
  theme(legend.position = "top", legend.title = element_blank(),
        legend.background = element_rect(fill = "transparent")) +
  
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.line.x=element_blank(),
        plot.margin=margin(t=.2, r = .2, b=0, l=.2, unit="cm"))

running score

# 畫rank
if (rankmode == "comb") {
  rel_heights <- c(1.5, .2, 1.5) # 整個GSEA圖上中下三個部分的比例
  
  p2 <- ggplot(gsdata, aes_(x = ~x)) +
    geom_bar(position = "dodge",stat = "identity",
             aes(x, y = position*(-0.1), fill=Description))+#圖中所有豎線即y值相等掀抹，都是0
    xlab(NULL) + ylab(NULL) + 
    scale_fill_manual(values = mycol) + #用自定義的顏色
    
    theme_bw(base_size) +
    theme(panel.grid = element_blank()) + #不畫網(wǎng)格
    
    theme(legend.position = "none",
          plot.margin = margin(t=-.1, b=0,unit="cm"),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          axis.line.x = element_blank()) +
    scale_x_continuous(expand=c(0,0)) +
    scale_y_continuous(expand=c(0,0))
  
} else if (rankmode == "sep") {
  rel_heights <- c(1.5, .5, 1.5) # 上中下三個部分的比例
  
  i <- 0
  for (term in unique(gsdata$Description)) {
    idx <- which(gsdata$ymin != 0 & gsdata$Description == term)
    gsdata[idx, "ymin"] <- i
    gsdata[idx, "ymax"] <- i + 1
  }
  
  head(gsdata)
  
  p2 <- ggplot(gsdata, aes_(x = ~x)) +
    geom_linerange(aes_(ymin=~ymin, ymax=~ymax, color=~Description)) +
    xlab(NULL) + ylab(NULL) + 
    scale_color_manual(values = mycol) + #用自定義的顏色
    
    theme_bw(base_size) +
    theme(panel.grid = element_blank()) + #不畫網(wǎng)格
    
    theme(legend.position = "none",
          plot.margin = margin(t=-.1, b=0,unit="cm"),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          axis.line.x = element_blank()) +
    scale_x_continuous(expand=c(0,0)) +
    scale_y_continuous(expand=c(0,0))
} else {stop("Unsupport mode")}

rank

# 畫變化倍數(shù)
df2 <- p.res$data
df2$y <- p.res$data$geneList[df2$x]


# 提取感興趣的基因的變化倍數(shù)
selectgenes <- data.frame(gsym = selectedGeneID)
selectgenes <- merge(selectgenes, df2, by = "gsym")
selectgenes <- selectgenes[selectgenes$position == 1,]

p.pos <- ggplot(selectgenes, aes(x, y, fill = Description, color = Description, label = gsym)) + 
  geom_segment(data=df2, aes_(x=~x, xend=~x, y=~y, yend=0), 
               color = "grey") +
  geom_bar(position = "dodge", stat = "identity") +
  scale_fill_manual(values = mycol, guide=FALSE) + #用自定義的顏色
  scale_color_manual(values = mycol, guide=FALSE) + #用自定義的顏色
  
  scale_x_continuous(expand=c(0,0)) +
  geom_hline(yintercept = 0, lty = 2, lwd = 0.2) + #在0的位置畫虛線
  ylab("Ranked list\n metric") +
  xlab("Rank in ordered dataset") +
  
  theme_bw(base_size) +
  theme(panel.grid = element_blank()) +
  
  # 顯示感興趣的基因的基因名
  geom_text_repel(data = selectgenes, 
                  show.legend = FALSE, #不顯示圖例
                  direction = "x", #基因名橫向排列在x軸方向
                  ylim = c(2, NA), #基因名畫在-2下方
                  angle = 90, #基因名豎著寫
                  size = 2.5, box.padding = unit(0.35, "lines"), 
                  point.padding = unit(0.3, "lines")) +
  theme(plot.margin=margin(t = -.1, r = .2, b=.2, l=.2, unit="cm"))

p.pos

# 組圖
plotlist <- list(p.res, p2, p.pos)
n <- length(plotlist)
plotlist[[n]] <- plotlist[[n]] +
  theme(axis.line.x = element_line(),
        axis.ticks.x = element_line(),
        axis.text.x = element_text())

plot_grid(plotlist = plotlist, ncol = 1, align="v", rel_heights = rel_heights)

最后編輯于：2022.09.01 10:35:04

?著作權歸作者所有,轉載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末，一起剝皮案震驚了整個濱河市心俗，隨后出現(xiàn)的幾起案子傲武，更是在濱河造成了極大的恐慌，老刑警劉巖城榛，帶你破解...
沈念sama閱讀 206,013評論 6贊 481
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件揪利，死亡現(xiàn)場離奇詭異，居然都是意外死亡吠谢，警方通過查閱死者的電腦和手機土童，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 88,205評論 2贊 382
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門，熙熙樓的掌柜王于貴愁眉苦臉地迎上來工坊，“玉大人献汗，你說我怎么就攤上這事⊥跷郏” “怎么了罢吃？”我有些...
開封第一講書人閱讀 152,370評論 0贊 342
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵，是天一觀的道長昭齐。經(jīng)常有香客問我尿招，道長，這世上最難降的妖魔是什么阱驾？我笑而不...
開封第一講書人閱讀 55,168評論 1贊 278
?港島之戀（遺憾婚禮）
正文為了忘掉前任就谜，我火速辦了婚禮，結果婚禮上里覆，老公的妹妹穿的比我還像新娘丧荐。我一直安慰自己，他們只是感情好喧枷，可當我...
茶點故事閱讀 64,153評論 5贊 371
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布虹统。她就那樣靜靜地躺著，像睡著了一般隧甚。火紅的嫁衣襯著肌膚如雪车荔。梳的紋絲不亂的頭發(fā)上，一...
開封第一講書人閱讀 48,954評論 1贊 283
城市分裂傳說
那天戚扳，我揣著相機與錄音忧便，去河邊找鬼。笑死帽借，一個胖子當著我的面吹牛珠增，可吹牛的內(nèi)容都是我干的。我是一名探鬼主播宜雀，決...
沈念sama閱讀 38,271評論 3贊 399
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼切平，長吁一口氣：“原來是場噩夢啊……” “哼！你這毒婦竟也來了辐董？” 一聲冷哼從身側響起悴品，我...
開封第一講書人閱讀 36,916評論 0贊 259
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤，失蹤者是張志新（化名）和其女友劉穎简烘，沒想到半個月后苔严，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體，經(jīng)...
沈念sama閱讀 43,382評論 1贊 300
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡孤澎，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 35,877評論 2贊 323
?白月光啟示錄
正文我和宋清朗相戀三年届氢，在試婚紗的時候發(fā)現(xiàn)自己被綠了。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片覆旭。...
茶點故事閱讀 37,989評論 1贊 333
活死人
序言：一個原本活蹦亂跳的男人離奇死亡退子，死狀恐怖岖妄，靈堂內(nèi)的尸體忽然破棺而出，到底是詐尸還是另有隱情寂祥，我是刑警寧澤荐虐，帶...
沈念sama閱讀 33,624評論 4贊 322
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布，位于F島的核電站丸凭，受9級特大地震影響福扬，放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜惜犀，卻給世界環(huán)境...
茶點故事閱讀 39,209評論 3贊 307
男人毒藥：我在死后第九天來索命
文/蒙蒙一铛碑、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧虽界，春花似錦汽烦、人聲如沸。這莊子的主人今日做“春日...
開封第一講書人閱讀 30,199評論 0贊 19
一樁弒父案刹缝，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽。三九已至颈将，卻和暖如春梢夯，著一層夾襖步出監(jiān)牢的瞬間，已是汗流浹背晴圾。一陣腳步聲響...
開封第一講書人閱讀 31,418評論 1贊 260
情欲美人皮
我被黑心中介騙來泰國打工颂砸，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留，地道東北人死姚。一個月前我還...
沈念sama閱讀 45,401評論 2贊 352
代替公主和親
正文我出身青樓人乓，卻偏偏與公主長得像，于是被迫代替她去往敵國和親都毒。傳聞我的和親對象是個殘疾皇子色罚，可洞房花燭夜當晚...
茶點故事閱讀 42,700評論 2贊 345