前言
在各種RNA測序數(shù)據(jù)中剪廉,我們幾乎都避不開差異表達基因(DEGs)的分析员魏。我們可以用氣泡圖疮蹦,提琴圖和熱圖等展示不同細胞亞群或樣本的差異基因表達情況,而火山圖則是最直觀的表現(xiàn)形式之一塔鳍,因此本文將介紹畫火山圖的作圖。
單組火山圖
這是比較常見的火山圖呻此,但只能展示兩組的差異基因轮纫。
函數(shù)示例:
deg_volcano_plot(all_markers,label='out',fc.filter=1.5,p.val=0.01, wt='avg_log2FC',gene.col='gene',lb.filter=2)
deg_volcano_plot函數(shù)的參數(shù):
all_markers 包含顯著性,差異倍數(shù)焚鲜,基因名的數(shù)據(jù)框掌唾,可由Seurat的FindAllmarkers或FindMarkers得到
label='out' ,輸出文件的前綴忿磅。
fc.filter=1.5 糯彬,展示的基因點FC閾值
p.val=0.01,顯著性閾值
wt='avg_log2FC' 葱她,差異倍數(shù)的列名
gene.col='gene'撩扒,基因名所在列名
lb.filter=2,標記基因名的FC閾值
library(dplyr)
library(ggplot2)
library(stringr)
library(data.table)
deg_volcano_plot <- function(all_markers,label='out',fc.filter=1.5,p.val=0.01, wt='avg_log2FC',gene.col='gene',lb.filter=2){
try({
a <- all_markers
a$gene <- a[,gene.col]
a$logfc <- a[,wt]
if (wt=='avg_log2FC'){
a$FC<-2^a$logfc
}else{
a$FC<-exp(a$logfc)
}
a$log2FC <- log2(a$FC)
a_h<-a[a$FC>=fc.filter & a$p_val_adj<=p.val,]
a_l<-a[a$FC<1/fc.filter & a$p_val_adj<p.val,]
gene_list <- c(a_h$gene, a_l$gene)
a_all<-rbind(a_h,a_l)
pos<- a$gene %in% a_all$gene
a_no<-a[!pos,]
try(a_h$type<-"Up")
try(a_l$type<-"Down")
try(a_no$type<-"None")
a_all<-rbind(a_h,a_no,a_l)
mat<-a_all
ran<-runif(nrow(a_all),min = 25,max =50)
ran<-100^-ran
mat$p_val_adj<-mat$p_val_adj+ran
cdn <- which(mat$gene %in% gene_list)
mat1 <- mat[cdn,]
pdf(paste0(label,"_vocalno_plot_",".pdf"),12,12)
p1<-ggplot(mat,aes(log2(FC),-1*log10(p_val_adj)))+
geom_point(aes(color =type))+
geom_text_repel(data=subset(mat1, abs(mat1$log2FC) >=log2(lb.filter)),aes(x=log2(FC),y=-log10(p_val_adj),label=gene),size=5,point.padding = 0.5)+
scale_color_manual(values=c('#377EB8',"lightgrey",'#E41A1C'))+
theme(panel.background =element_blank(),axis.line=element_line(colour="black"))
p1<-p1+theme(legend.title=element_blank(),legend.background = element_blank(),legend.key = element_blank(), legend.text = element_text(size=20), legend.position="NA")
p1<-p1+theme_bw()+theme(axis.text = element_text(size = 20))
p1<-p1+geom_vline(xintercept=c(-1,1),lty=3,col="black",lwd=0.8)+geom_hline(yintercept = -log10(0.01),lty=3,col="black",lwd=0.8)
p1<-p1+ggtitle(label)+theme(plot.title = element_text(hjust = 0.5,size=30,face="bold"))+theme(axis.title =element_text(size=20))
print(p1)
dev.off()
})
}
多組火山圖
相比于前者可以展示多組的差異基因吨些,plot_mutideg函數(shù)理論上也能畫單組火山圖搓谆。
函數(shù)使用示例:
plot_mutideg(df,incluster='cluster',infc='avg_log2FC',ingene='gene',pos=TRUE,prefix='out',mycol <- c("#E64B357F","#4DBBD57F","#00A0877F","#3C54887F","#F39B7F7F","#8491B47F","#91D1C27F","#DC00007F","#7E61487F"))
plot_mutideg函數(shù)的參數(shù):
df 包含顯著性,差異倍數(shù)豪墅,基因名的數(shù)據(jù)框泉手,可由Seurat的FindAllmarkers得到
incluster,默認為cluster偶器,分組的列名
infc斩萌,默認為avg_log2FC啡氢,差異倍數(shù)所在列名
ingene,默認為gene术裸,基因名所在列名倘是,
pos,默認為TRUE袭艺,是否只展示上調基因搀崭,
prefix='out',輸出文件的前綴
mycol猾编,默認為NULL瘤睹,調色板,顏色數(shù)目需多于分組的數(shù)目答倡。
library(ggplot2)
library(tidyverse)
library(ggrepel)
plot_mutideg <- function(df,incluster='cluster',infc='avg_log2FC',ingene='gene',pos=TRUE,prefix='out',
mycol=NULL){
df$cluster=unlist(df[incluster])
df$avg_log2FC=unlist(df[infc])
df$gene=unlist(df[ingene])
df$label <- ifelse(df$p_val_adj<0.01,"adjust P-val<0.01","adjust P-val>=0.01")
top10sig <- df %>% group_by(cluster) %>% top_n(10,wt=avg_log2FC)
df$size <- case_when(!(df$gene %in% top10sig$gene)~ 1,
df$gene %in% top10sig$gene ~ 2)
dt <- filter(df,size==1)
b1 <- top10sig %>% group_by(cluster) %>% summarize(bar = max(avg_log2FC,na.rm = T))
b2 <- top10sig %>% group_by(cluster) %>% summarize(bar = min(avg_log2FC,na.rm = T))
lsed= c(1:length(unique(df$cluster)))-1
dfbar<-data.frame(x=lsed,
y=b1$bar)
dfbar1<-data.frame(x=lsed,
y=b2$bar)
iny=0
if (pos){
p1 <- ggplot()+
geom_col(data = dfbar,
mapping = aes(x = x,y = y),
fill = "#dcdcdc",alpha = 0.6)
iny=-0.5
}else{
p1 <- ggplot()+
geom_col(data = dfbar,
mapping = aes(x = x,y = y),
fill = "#dcdcdc",alpha = 0.6)+
geom_col(data = dfbar1,
mapping = aes(x = x,y = y),
fill = "#dcdcdc",alpha = 0.6)
}
p2 <- p1+
geom_jitter(data = dt,
aes(x = cluster, y = avg_log2FC, color = label),
size = 0.85,
width =0.4)+
geom_jitter(data = data.frame(top10sig),
aes(x = cluster, y = avg_log2FC, color = label),
size = 1,
width =0.4,
shape=17)
dfcol<-data.frame(x=lsed,
y=iny,
label=unique(df$cluster))
p3 <- p2 + geom_tile(data = dfcol,
aes(x=x,y=y),
height=0.4,
color = "black",
fill = mycol,
alpha = 0.6,
show.legend = F)
p4 <- p3+
geom_text_repel(
data=top10sig,
aes(x=cluster,y=avg_log2FC, label=gene),
force = 1.2,
arrow = arrow(length = unit(0.008, "npc"),
type = "open", ends = "last")
)
p5 <- p4 +
scale_color_manual(name=NULL,
values = c("red","black"))
p6 <- p5+
labs(x="Cluster",y="average logFC")+
geom_text(data=dfcol,
aes(x=x,y=y,label=label),
size =6,
color ="white")
p7 <- p6+
theme_minimal()+
theme(
axis.title = element_text(size = 13,
color = "black",
face = "bold"),
axis.line.y = element_line(color = "black",
size = 1.2),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
panel.grid = element_blank(),
legend.position = "top",
legend.direction = "vertical",
legend.justification = c(1,0),
legend.text = element_text(size = 15)
)
pdf(paste(prefix,'_multideg.pdf'),18,10)
print(p7)
dev.off()
}