在上一篇中,我已經(jīng)講解了展示marker基因的前兩種圖形,分別是tsne/umap圖踢京、熱圖,感興趣的讀者可以回顧一下宦棺。這一節(jié)我們繼續(xù)學(xué)習(xí)堆疊小提琴圖和氣泡圖瓣距。
3. 堆疊小提琴圖展示marker基因
相比于其他可視化形式,小提琴圖可以更直觀地展示某一類亞群的某一個(gè)基因的表達(dá)分布情況代咸。我的marker基因一共選了12個(gè)蹈丸,下面來畫圖:
Seurat內(nèi)置的VlnPlot函數(shù)可以直接畫,
library(xlsx)
markerdf2=read.xlsx("ref_marker2.xlsx",sheetIndex = 1)
markerdf2$gene=as.character(markerdf2$gene)
mye.seu=readRDS("mye.seu.rds")
mye.seu$celltype=factor(mye.seu$celltype,levels = sort(unique(mye.seu$celltype)))
Idents(mye.seu)="celltype"
VlnPlot(mye.seu, features = markerdf2$gene, pt.size = 0, ncol = 1)+
scale_x_discrete("")+
theme(
axis.text.x.bottom = element_blank()
)
ggsave("vln1.pdf",width = 20,height = 80,units = "cm")
其中pt.size
參數(shù)表示點(diǎn)的大小呐芥,一個(gè)點(diǎn)就是一個(gè)細(xì)胞逻杖,一般可以直接設(shè)置為0,即不顯示點(diǎn)思瘟,只畫小提琴荸百,看上去更加清楚。盡管此處我對(duì)標(biāo)度和主題進(jìn)行了調(diào)整滨攻,但我發(fā)現(xiàn)這只對(duì)單個(gè)feature有用够话,多個(gè)feature時(shí)就不起作用了蓝翰,后續(xù)就用AI來簡(jiǎn)單編輯一下吧。
需要注意的是女嘲,圖的顏色是根據(jù)亞群的類別來劃分的畜份,并不是根據(jù)基因來區(qū)分。
第二種方法欣尼,ggplot2代碼如下
library(reshape2)
vln.df=as.data.frame(mye.seu[["RNA"]]@data[markerdf2$gene,])
vln.df$gene=rownames(vln.df)
vln.df=melt(vln.df,id="gene")
colnames(vln.df)[c(2,3)]=c("CB","exp")
#數(shù)據(jù)格式如下
# > head(vln.df)
# gene CB exp
# 1 CLEC9A N01_AAACGGGCATTTCAGG_1 0.000
# 2 RGCC N01_AAACGGGCATTTCAGG_1 0.000
# 3 FCER1A N01_AAACGGGCATTTCAGG_1 0.000
# 4 CD1A N01_AAACGGGCATTTCAGG_1 0.000
# 5 FSCN1 N01_AAACGGGCATTTCAGG_1 1.104
# 6 CCR7 N01_AAACGGGCATTTCAGG_1 0.000
anno=mye.seu@meta.data[,c("CB","celltype")]
vln.df=inner_join(vln.df,anno,by="CB")
vln.df$gene=factor(vln.df$gene,levels = markerdf2$gene) #為了控制畫圖的基因順序
vln.df%>%ggplot(aes(celltype,exp))+geom_violin(aes(fill=gene),scale = "width")+
facet_grid(vln.df$gene~.,scales = "free_y")+
scale_fill_brewer(palette = "Set3",direction = 1)+
scale_x_discrete("")+scale_y_continuous("")+
theme_bw()+
theme(
axis.text.x.bottom = element_text(angle = 45,hjust = 1,vjust = 1),
panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
legend.position = "none"
)
ggsave("vln2.pdf",width = 11,height = 22,units = "cm")
geom_violin()函數(shù)的scale參數(shù)為"width"時(shí)爆雹,所有小提琴有相同的寬度,默認(rèn)是"area"媒至,有相同的面積顶别;facet_grid()用來分面谷徙,文中用的是多行一列拒啰,scales = "free_y"表示不同行之間可以有不同范圍的y值;scale_fill_brewer()使用ColorBrewer調(diào)色板完慧。
這個(gè)圖的顏色根據(jù)基因來區(qū)分谋旦,有時(shí)可能還會(huì)看到小提琴圖的顏色是用亞群某個(gè)基因的表達(dá)均值來映射的,比如
vln.df$celltype_gene=paste(vln.df$celltype,vln.df$gene,sep = "_")
stat.df=as.data.frame(vln.df%>%group_by(celltype,gene)%>%summarize(mean=mean(exp)))
stat.df$celltype_gene=paste(stat.df$celltype,stat.df$gene,sep = "_")
stat.df=stat.df[,c("mean","celltype_gene")]
vln.df=inner_join(vln.df,stat.df,by="celltype_gene")
vln.df$mean=ifelse(vln.df$mean > 3, 3, vln.df$mean)
#這里的閾值3要提前綜合所有基因看一下
vln.df%>%ggplot(aes(celltype,exp))+geom_violin(aes(fill=mean),scale = "width")+
facet_grid(vln.df$gene~.,scales = "free_y")+
scale_fill_gradient(limits=c(0,3),low = "lightgrey",high = "yellow")+
scale_x_discrete("")+scale_y_continuous("",expand = c(0.02,0))+
theme_bw()+
theme(
panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
axis.text.x.bottom = element_text(angle = 45,hjust = 1,vjust = 1)
)
ggsave("vln3.pdf",width = 11,height = 22,units = "cm")
4. 氣泡圖展示marker基因
Seurat的畫法是這樣的屈尼,
DotPlot(mye.seu, features = markerdf2$gene)+RotatedAxis()+
scale_x_discrete("")+scale_y_discrete("")
#其余的微調(diào)同ggplot2
第二種方法册着,ggplot2代碼如下
bubble.df=as.matrix(mye.seu[["RNA"]]@data[markerdf2$gene,])
bubble.df=t(bubble.df)
bubble.df=as.data.frame(scale(bubble.df))
bubble.df$CB=rownames(bubble.df)
bubble.df=merge(bubble.df,mye.seu@meta.data[,c("CB","celltype")],by = "CB")
bubble.df$CB=NULL
celltype_v=c()
gene_v=c()
mean_v=c()
ratio_v=c()
for (i in unique(bubble.df$celltype)) {
bubble.df_small=bubble.df%>%filter(celltype==i)
for (j in markerdf2$gene) {
exp_mean=mean(bubble.df_small[,j])
exp_ratio=sum(bubble.df_small[,j] > min(bubble.df_small[,j])) / length(bubble.df_small[,j])
celltype_v=append(celltype_v,i)
gene_v=append(gene_v,j)
mean_v=append(mean_v,exp_mean)
ratio_v=append(ratio_v,exp_ratio)
}
}
plotdf=data.frame(
celltype=celltype_v,
gene=gene_v,
exp=mean_v,
ratio=ratio_v
)
plotdf$celltype=factor(plotdf$celltype,levels = sort(unique(plotdf$celltype)))
plotdf$gene=factor(plotdf$gene,levels = rev(as.character(markerdf2$gene)))
plotdf$exp=ifelse(plotdf$exp>3,3,plotdf$exp)
plotdf%>%ggplot(aes(x=celltype,y=gene,size=ratio,color=exp))+geom_point()+
scale_x_discrete("")+scale_y_discrete("")+
scale_color_gradientn(colours = rev(c("#FFD92F","#FEE391",brewer.pal(11, "Spectral")[7:11])))+
scale_size_continuous(limits = c(0,1))+theme_bw()+
theme(
axis.text.x.bottom = element_text(hjust = 1, vjust = 1, angle = 45)
)
ggsave(filename = "bubble2.pdf",width = 9,height = 12,units = c("cm"))
這兩種方法具體函數(shù)定義略有差異,所以氣泡圖看上去不太一樣
到這里脾歧,marker基因的可視化就結(jié)束了甲捏,基本就是這些。如果你覺得上述內(nèi)容對(duì)你有用鞭执,歡迎轉(zhuǎn)發(fā)司顿,點(diǎn)贊!有任何疑問可以在公眾號(hào)后臺(tái)提出兄纺,我都會(huì)回復(fù)的大溜。
因水平有限,有錯(cuò)誤的地方估脆,歡迎批評(píng)指正钦奋!