據(jù)說莺戒,多種R包可以繪制雷達(dá)圖,對(duì)ggplot系列函數(shù)熟悉的小伙伴可以選擇ggradar
和echarts4r
急波。fmsb
和radarchart
也同樣能實(shí)現(xiàn)你的需求
下面惰匙,從單細(xì)胞差異分析入手冠王,直到GO富集分析
1.單細(xì)胞差異分析
FindAllMarkers
# 6. FindAllMarkers celltype----
sce
# An object of class Seurat
# 25288 features across 13560 samples within 1 assay
# Active assay: RNA (25288 features, 2000 variable features)
# 4 dimensional reductions calculated: pca, tsne, umap, harmony
table(sce$celltype)
Idents(sce) <- "celltype"
Idents(sce)
sce.markers <- FindAllMarkers(sce)
save(sce.markers,file = "harmony_celltype.markers.Rdata")
write.csv(sce.markers,file = "harmony_celltype.markers.csv")
2. 挑top100
head(harmony_celltype_markers)
# p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
# FCER1A 0.000000e+00 2.016015 0.734 0.126 0.000000e+00 Inflammatory_DC FCER1A
# CD1C 0.000000e+00 1.647918 0.503 0.064 0.000000e+00 Inflammatory_DC CD1C
# IL1R2 1.776186e-291 1.655928 0.567 0.099 4.491619e-287 Inflammatory_DC IL1R2
# CD1E 3.616531e-250 1.093357 0.333 0.036 9.145483e-246 Inflammatory_DC CD1E
# NAPSB 3.350806e-242 1.358380 0.521 0.097 8.473517e-238 Inflammatory_DC NAPSB
# NR4A3 5.633118e-232 1.374079 0.625 0.145 1.424503e-227 Inflammatory_DC NR4A3
sce.markers <- harmony_celltype_markers
DT::datatable(sce.markers)
# 對(duì)每一個(gè)cluster登颓,根據(jù) avg_log2FC惧磺,取前100個(gè)基因
top100 <- sce.markers %>% group_by(cluster) %>% top_n(100, avg_log2FC)
head(top100)
dim(top100)
#1000 7
3. GO富集分析
-
bitr
: 轉(zhuǎn)換ID
# GO富集分析
# https://mp.weixin.qq.com/s/z-Kk02I2g8vAESgcGuncUg
library(clusterProfiler)
# 挑選p_val<0.05
de_genes <- subset(top100, p_val<0.05)
length(de_genes$gene) # 1000
head(de_genes)
# 轉(zhuǎn)化ID
entrez_genes <- bitr(de_genes$gene, fromType="SYMBOL",
toType="ENTREZID",
OrgDb="org.Hs.eg.db") # 人
# 5.45% of input gene IDs are fail to map...
head(entrez_genes)
# SYMBOL ENTREZID
# 1 FCER1A 2205
# 2 CD1C 911
# 3 IL1R2 7850
# 4 CD1E 913
# 5 NAPSB 256236
# 6 NR4A3 8013
# 將原來的SYMBOL 和 ENTREZID 對(duì)應(yīng)起來
mix <- merge(de_genes,entrez_genes,by.x="gene",by.y="SYMBOL")
dim(mix) #950 8
head(mix)
# gene p_val avg_log2FC pct.1 pct.2 p_val_adj cluster ENTREZID
# 1 A2M 4.322616e-68 0.4559287 0.402 0.183 1.093103e-63 TREM2_Mf 2
# 2 A2M 2.036888e-05 0.5190426 0.210 0.200 5.150881e-01 CCR2- HLA-DRhi C1 2
# 3 ABCA6 7.529550e-58 0.7018329 0.127 0.052 1.904073e-53 CCR2- HLA-DRhi C1 23460
# 4 ABI3 1.146337e-237 1.3451063 0.489 0.106 2.898856e-233 CD16+ Mono 51225
# 5 ABL2 0.000000e+00 1.3000718 0.501 0.180 0.000000e+00 CCR2+ HLA-DRhi C2 27
# 6 ACAP1 2.340444e-179 0.9188265 0.333 0.047 5.918515e-175 Mast cell 9744
-
merge
: 整合 -
split
: 拆分 -
compareCluster
: 富集分析(多個(gè)clusters)
# 整合起來
de_gene_clusters <- data.frame(ENTREZID=mix$ENTREZID,
cluster=mix$cluster)
table(de_gene_clusters$cluster)
# 只挑選其中3個(gè)cluster作富集
de_gene_clusters <- de_gene_clusters[(de_gene_clusters$cluster == 'CCR2- HLA-DRhi C1'|
de_gene_clusters$cluster=='CCR2+ HLA-DRhi C2'|
de_gene_clusters$cluster== 'CCR2+ HLA-DRhi C3'),]
table(de_gene_clusters$cluster)
# CCR2- HLA-DRhi C1 CCR2+ HLA-DRhi C2 CCR2+ HLA-DRhi C3
# 96 98 95
# 拆分成list
list_de_gene_clusters <- split(de_gene_clusters$ENTREZID,de_gene_clusters$cluster)
因?yàn)槲蚁旅婵梢暬轻槍?duì)某些通路,所以這里pvalueCutoff
,qvalueCutoff
都調(diào)的很大赏寇,盡可能富集出感興趣的通路出來
# 多個(gè)clusters富集分析
formula_res <- compareCluster(
ENTREZID~cluster,
data=de_gene_clusters,
fun="enrichGO",
OrgDb="org.Hs.eg.db",
ont = "ALL", # GO富集類型,One of "BP", "MF", and "CC" or "ALL"
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff =1,
minGSSize = 5, # 最小的基因富集數(shù)量
maxGSSize = 1000) # 最大的基因富集數(shù)量
head(formula_res)
f=as.data.frame(formula_res)
dim(f) #9805 12
- 挑選感興趣的通路
enrich <- c("response to oxidative stress",
"response to endoplasmic reticulum stress",
"interleukin-10 production",
'antigen processing and presentation',
'endocytosis',
'response to tumor necrosis factor',
'I-kappaB kinase/NF-kappaB signaling',
'response to interferon-gamma',
'cytokine production',
'leukocyte chemotaxis')
choose_f <- f[(f$Description %in% enrich),]
table(choose_f$Description)
df=choose_f
4. 氣泡圖
p=ggplot(df,aes(x = cluster,
y = Description, # 按照富集度大小排序
size = Count,
colour=p.adjust)) +
geom_point(shape = 16)+ # 設(shè)置點(diǎn)的形狀
labs(x = "Ratio", y = "Pathway")+ # 設(shè)置x吉嫩,y軸的名稱
scale_colour_continuous( # 設(shè)置顏色圖例
name="Enrichment", # 圖例名稱
low="#7fcdbb", # 設(shè)置顏色范圍
high="#fc9272")+
scale_radius( # 設(shè)置點(diǎn)大小圖例
range=c(5,9), # 設(shè)置點(diǎn)大小的范圍
name="Size")+ # 圖例名稱
guides(
color = guide_colorbar(order = 1), # 決定圖例的位置順序
size = guide_legend(order = 2)
)+theme_bw() # 設(shè)置主題
p
- 如果名字太長(zhǎng),可以換行顯示
p + scale_y_discrete(labels=function(x) str_wrap(x, width=12)) +
scale_x_discrete(labels=function(x) str_wrap(x, width=10)) # 名字換行
5. 雷達(dá)圖
-
dcast
: 先換成短矩陣
ff <- choose_f[,c("Cluster","Description","Count")]
head(ff)
# Cluster Description Count
# 2 CCR2- HLA-DRhi C1 endocytosis 17
# 51 CCR2- HLA-DRhi C1 response to tumor necrosis factor 7
# 74 CCR2- HLA-DRhi C1 response to interferon-gamma 5
# 93 CCR2- HLA-DRhi C1 leukocyte chemotaxis 6
# 128 CCR2- HLA-DRhi C1 antigen processing and presentation 4
# 785 CCR2- HLA-DRhi C1 response to oxidative stress 5
# 長(zhǎng)數(shù)據(jù)換成短數(shù)據(jù)形式
dd=dcast(ff,Cluster~Description,value.var = 'Count')
- 調(diào)整格式
rownames(dd) <- dd[,1]
dd <- dd[,-1]
# 第一行包含每個(gè)變量的最大值
# 第二行包含每個(gè)變量的最小值
dd=rbind(rep(20,5) , rep(0,5) , dd)
# 變量需要換成數(shù)值形式
for (i in 1:10) {
#i=1
dd[,i] <- as.numeric(dd[,i])
}
str(dd)
# 'data.frame': 5 obs. of 10 variables:
# $ antigen processing and presentation : num 20 0 4 1 3
# $ cytokine production : num 20 0 7 15 17
# $ endocytosis : num 20 0 17 4 4
# $ I-kappaB kinase/NF-kappaB signaling : num 20 0 1 6 4
# $ interleukin-10 production : num 20 0 1 1 NA
# $ leukocyte chemotaxis : num 20 0 6 11 10
# $ response to endoplasmic reticulum stress: num 20 0 2 6 2
# $ response to interferon-gamma : num 20 0 5 4 3
# $ response to oxidative stress : num 20 0 5 12 8
# $ response to tumor necrosis factor : num 20 0 7 14 3
-
radarchart
: 終于到畫圖了
library(fmsb)
colnames(dd)
# 有些名字太長(zhǎng)了嗅定,需要調(diào)整下距離
# https://stackoverflow.com/questions/69306607/how-to-move-radar-chart-spider-chart-labels-in-r-fmsb-for-r-shiny-so-labels-do
f=c("antigen processing and presentation" ,"cytokine production" ,
"endocytosis" ," NF-kappaB
signaling",
"interleukin-10
production" ,"leukocyte chemotaxis" ,
" response to
endoplasmic reticulum stress" ,
" response to
interferon-gamma",
" response to
oxidative stress" ,
" response to
tumor necrosis factor" )
# https://zhuanlan.zhihu.com/p/363992240
{cairo_pdf(file = "mouse-heart/fig8_radarchart.pdf",
width = 20,
height = 10,
family = "STSong")
#?radarchart
radarchart(dd,
axistype=0, # 坐標(biāo)軸的類型
vlcex=2, # 標(biāo)簽大小
palcex=5, # 軸四周字體大小縮放比例
pcol=colors_border , #設(shè)置顏色
pfcol=colors_in , # 內(nèi)部填充色
plwd=1.3 , #線條粗線
plty=1,#虛線自娩,實(shí)線
vlabels = c(f), # 標(biāo)簽
pty=32, # 點(diǎn)的形狀
cglcol="black", # 雷達(dá)圖網(wǎng)絡(luò)格顏色
cglty=3, #背景線條虛線,實(shí)線
cglwd=0.6 #背景線條粗細(xì)
)
legend(x=1.5, y=0.90, legend = rownames(dd[-c(1,2),]),
bty = "n",
pch=20 ,
col=colors_border,
text.col = "black",
cex=1.5, pt.cex=2)
dev.off()
}