前言
最近小Q在做自然選擇分析匣吊,分析完之后簡單粗暴的對候選基因做了富集分析,并做了展示蹬碧,比起氣泡圖,我模仿了另一種作圖方式炒刁,顯示效果更佳恩沽。所以想在此分享一下如何用R語言畫富集分析示意圖(非氣泡圖)。
畫圖思路
利用ggplot2+grid包進行畫圖翔始,采用分面的思想作圖罗心。
畫圖代碼
#導入所需R包
library(ggplot2)
library(grid)
#--------------------------------------
# 數(shù)據(jù)讀入與預處理
#--------------------------------------
options(stringsAsFactors = FALSE)
dat<-read.table("exampleData2.txt",header = TRUE,sep = "\t")
#---------------
# 輸入文件格式:Term Inputnumber Backgroundnumber PValue label
# Inputnumber: 你的基因列表和Term基因列表重疊的基因數(shù)目
# label:是你想特別強調(diào)的Trem,1表示特別強調(diào)城瞎,0表示不特別強調(diào)
Term Inputnumber Backgroundnumber PValue label
Plasma omega-6 10 689 0.000465508 0
Verbal declarative memory 9 340 0.000670600 0
Phospholipid levels 8 170 0.001023562 1
Temperament 12 158 0.002094460 0
Red blood cell fatty acid levels 11 43 0.003176734 1
Oleic acid plasma levels 7 25 0.003248211 1
Aortic root size 7 183 0.003264350 0
#---------------
# 按照GO的pval排序渤闷,從小到大
dat<-dat[order(dat$PValue),]
# 給排序后的Term編號,用作X-axis
group<-c(1:nrow(dat))
dat<-cbind(group,dat)
# 將label設為因子變量脖镀,便于fill
dat$label<-factor(dat$label)
#--------------------------------------
# 作圖
#--------------------------------------
# 通用參數(shù)
# 畫圖區(qū)域外的空白框設置
mg_l <- margin(0, 0, 0, 0, 'lines') # 方向: 上右下左
# 先分別用ggplot2畫好每一個子圖
# logP
mg_1 <- unit(c(0, 0, 0.3, 0), "lines") # 方向: 上右下左
# 之所以求logP而不是-logP飒箭,是為了讓bar是從頂部往下繪制,最終呈現(xiàn)圖中向左繪制的效果
# 因為logP是負值蜒灰,所以X tick的label需要自定義重新設置label
limit_1 <- c(min(log10(dat$PValue))-0.5, 0)
break_1 <- seq(from = 0, to = min(log10(dat$PValue)), by = -1)
label_1 <- seq(from = 0, to = abs(min(log10(dat$PValue))), by = 1)
# 之所以在這里第一個label設為空弦蹂,是為了后面拼圖時原點只有一個0,而不是兩個0,更好看些
# 在geneCount的label里第一個label不會設置為空
label_1[1]<-""
# 這里想要強調(diào)的條目設置成紅色,更醒目
# 這里顏色的fill = label
fillCol<-c(rgb(244,153,30,max=255),rgb(255,0,0,max=255))
fillCol_brk<-c(0,1)
# limits = c(0.5, (nrow(dat) + 0.5)), breaks = 1:nrow(dat)
# 為了后面統(tǒng)一各個子圖上term位置的一致性
p_logP <- ggplot(dat) +
geom_bar(aes(x=group, y=log10(PValue),fill=label), stat="identity", position="dodge",width = 0.5) +
scale_y_continuous(limits = limit_1, breaks = break_1,labels = label_1,expand = expand_scale()) +
scale_x_continuous(limits = c(0.5, (nrow(dat) + 0.5)), breaks = 1:nrow(dat),labels = NULL, expand = expand_scale(), position = 'top') +
scale_fill_manual(values = fillCol,breaks=fillCol_brk)+
theme(plot.margin = mg_1, axis.text = element_text(margin = mg_l),axis.ticks.y = element_blank()) +
xlab(NULL) +
ylab("-Log10(Pvalue)") +
coord_flip() +
guides(fill = FALSE)
# GeneCount
mg_2 <- unit(c(0, 0, 0.3, 0), "lines") # 方向: 上右下左
max_geneNum<-max(dat$Inputnumber)
limit_2 <- c(0,max_geneNum+1)
dig_temp<-nchar(as.character(max_geneNum))
by <- round(max_geneNum/(5 * 10^(dig_temp - 2))) * 10^(dig_temp - 2)
break_2 <- seq(from = 0, to = max_geneNum, by = by)
# 這里設置label是為了更美觀乡小,label沒有小數(shù)點
label_2 <- seq(from = 0, to = max_geneNum, by = by)
p_geneCount <- ggplot(dat) +
geom_bar(aes(x=group, y=Inputnumber), fill="#696969",stat="identity", position="dodge",width = 0.5) +
geom_text(aes(x=group, y=Inputnumber,label=Inputnumber),hjust=-0.5)+
scale_y_continuous(limits = limit_2,breaks = break_2,labels = label_2,expand = expand_scale()) +
scale_x_continuous(limits = c(0.5, (nrow(dat) + 0.5)), breaks = 1:nrow(dat),labels = NULL, expand = expand_scale(), position = 'top') +
theme(plot.margin = mg_2, axis.text = element_text(margin = mg_l),axis.ticks.y = element_blank()) +
xlab(NULL) +
ylab("No. of genes") +
coord_flip() +
guides(fill = FALSE)
# Function term
text_x <- rep(1, nrow(dat))
text_y <- 1:nrow(dat)
text_lab<-dat$Term
mg_3 <- unit(c(0, 0.3, 2.3, 0), "lines") # 方向: 上右下左
p_GoName <- ggplot() +
geom_text(aes(x = text_x, y= text_y, label = text_lab), hjust=1,size = 4) +
scale_x_continuous(limits = c(0,1), breaks = NULL, expand = expand_scale()) +
scale_y_continuous(limits = c(0.5, (nrow(dat) + 0.5)), breaks = NULL, expand = expand_scale()) +
labs(x = NULL, y = NULL) +
theme(plot.margin = mg_3,
axis.text = element_text(margin = mg_l),
panel.grid.major =element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())
# 分面畫圖
# 寫一個簡易函數(shù)調(diào)用viewport
vplayout <- function(x, y){
viewport(layout.pos.row = x, layout.pos.col = y)
}
grid.newpage() ##新建頁面
pushViewport(viewport(layout = grid.layout(1, 3)))
# goTerm logP GeneNum
print(p_GoName, vp = vplayout(1, 1))
print(p_logP, vp = vplayout(1, 2))
print(p_geneCount, vp = vplayout(1, 3))
最終效果圖:
轉(zhuǎn)載請標明出處和作者 ^+^
撰文 & 編輯:VickieQ
校對:HCLO4 & 花毛