忙里偷閑舍哄,整理了一套繪圖代碼钓猬,代碼300行,效果如上圖所示:其實(shí)就是把單細(xì)胞轉(zhuǎn)錄組中常見的氣泡圖根據(jù)分組信息展開了帐萎,代碼目前適用于分組后level數(shù)目為2/3/4這三種情況比伏。
如果不分組,就是普通的氣泡圖疆导,之前已經(jīng)講過(guò)幾次了:
- 單細(xì)胞分析實(shí)錄(9): 展示marker基因的4種圖形(二)
- 單細(xì)胞轉(zhuǎn)錄組繪圖視頻教程
繪圖完整流程如下
1. seurat標(biāo)準(zhǔn)流程
library(Seurat)
library(tidyverse)
library(xlsx)
library(harmony)
### 導(dǎo)入數(shù)據(jù) ########################################
testanno=readRDS("testanno.rds")
testmat=readRDS("testmat.rds")
### 標(biāo)準(zhǔn)流程&質(zhì)控 ###################################
testseu = CreateSeuratObject(counts = testmat)
testseu[["percent.mt"]] <- PercentageFeatureSet(testseu, pattern = "^mt-")
testseu@meta.data$sample=testseu@meta.data$orig.ident
VlnPlot(testseu,features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),group.by = "sample",pt.size = 0)
testseu <- subset(testseu, subset = nCount_RNA < 40000 &
nFeature_RNA < 5000 & nFeature_RNA > 500 &
percent.mt < 10)
testseu <- NormalizeData(testseu, normalization.method = "LogNormalize", scale.factor = 10000)
testseu <- FindVariableFeatures(testseu, selection.method = "vst", nfeatures = 2000)
testseu <- ScaleData(testseu)
testseu <- RunPCA(testseu, npcs = 50, verbose = FALSE)
### harmony去批次 ###################################
testseu=testseu %>% RunHarmony("sample", plot_convergence = TRUE)
### 降維&聚類 #######################################
testseu <- testseu %>%
RunUMAP(reduction = "harmony", dims = 1:20) %>%
FindNeighbors(reduction = "harmony", dims = 1:20) %>%
FindClusters(resolution = 0.5)
### 添加注釋 ########################################
testseu@meta.data$CB=rownames(testseu@meta.data)
testseu@meta.data=testseu@meta.data%>%inner_join(testanno,by="CB")
rownames(testseu@meta.data)=testseu@meta.data$CB
DimPlot(testseu,reduction = "umap",group.by = "seurat_clusters",pt.size = 1,label = T,repel = T,label.size = 6)+
DimPlot(testseu,reduction = "umap",group.by = "celltype",pt.size = 1,label = T,repel = T,label.size = 6)
rm(list = c("testanno", "testmat"))
2. 準(zhǔn)備差異基因
### 找每一群細(xì)胞的marker基因 ###################################################
testseu@meta.data$celltype=factor(testseu@meta.data$celltype,
levels = sort(unique(testseu@meta.data$celltype)))
Idents(testseu) = "celltype"
markerdf=FindAllMarkers(testseu,logfc.threshold = 0.8,only.pos = T)
markerdf=markerdf %>% group_by(cluster) %>% top_n(100,wt = avg_log2FC)
markerdf=as.data.frame(markerdf)
markerdf=markerdf%>%arrange(cluster,desc(avg_log2FC))
write.csv(markerdf,file = "celltype_marker_log2fc0.8_top100.csv",quote = F,row.names = F)
# 之后從這個(gè)表格中挑出想畫圖的基因赁项,另存為plotgene.xlsx
plotgene=read.xlsx("plotgene.xlsx",sheetIndex = 1,header = T)
### 接下來(lái)為了得到最終圖的效果,人為構(gòu)建分組信息 ###############################
# 實(shí)際分析中澈段,分組信息是不能人為隨意構(gòu)建的悠菜,而是課題樣本內(nèi)在具有的信息。
# 比如败富,本示例數(shù)據(jù)有7個(gè)樣本: Arep1 Arep2 Arep3 Brep1 Brep2 Brep3 Brep4
# 顯然可以分為兩組: Arep和Brep
# 這里人為設(shè)定了3種分組悔醋,分別包含2/3/4個(gè)水平,以此來(lái)測(cè)試代碼的適用性囤耳,這三種
# 應(yīng)該涵蓋了絕大多數(shù)用戶的需求篙顺。
testseu@meta.data$group1=sample(c("groupA","groupB"),dim(testseu@meta.data)[1],replace = T)
testseu@meta.data$group2=sample(c("groupA","groupB","groupC"),dim(testseu@meta.data)[1],replace = T)
testseu@meta.data$group3=sample(c("groupA","groupB","groupC","groupD"),dim(testseu@meta.data)[1],replace = T)
testseu@meta.data$group3[testseu@meta.data$group3 == "groupD" &
testseu@meta.data$celltype == "cell_B"] = "groupC"
3. 繪制分組氣泡圖
# 必要的輸入:
# seurat對(duì)象偶芍,meta數(shù)據(jù)框中有表示細(xì)胞類型的列和表示分組的列充择;
# 提供細(xì)胞類型的順序;
# 表示marker基因的數(shù)據(jù)框匪蟀,至少有兩列椎麦,一列為cluster,一列為gene材彪,基因名無(wú)重復(fù)观挎。
source("bubble_split.R")
bubble_split(seu.obj = testseu,celltype.column = "celltype",
celltype.order = levels(Idents(testseu)),
group.column = "group1",deg = plotgene,
rect_color = "red",rect_size = 1)
ggsave("bubble_分兩組.pdf",width = 40,height = 7,units = "cm")
bubble_split(seu.obj = testseu,celltype.column = "celltype",
celltype.order = levels(Idents(testseu)),
group.column = "group3",deg = plotgene,
rect_color = "black",rect_size = 0.5)
ggsave("bubble_分四組.pdf",width = 40,height = 11,units = "cm")
獲取代碼
淘,
寶店-
鋪TOP生物信息