導讀
宏基因組分析分為物種分析和功能分析兩大塊。物種組成分析是物種分析中最基本最常見的分析方法楔壤。利用R語言堆疊圖鹤啡,我們可以將一個項目中所有樣品的物種組成展示出來。下面介紹如何利用R語言進行物種組成分析和可視化蹲嚣。過程分為以下幾步:1)模擬豐度矩陣递瑰;2)模擬分組;3)標準化豐度隙畜;4)調整格式抖部;5)ggplot2繪制堆疊圖、沖積圖议惰、分面慎颗、分組、堆疊面積圖。
1 模擬豐度矩陣
set.seed(1995)
# 隨機種子
data=matrix(abs(round(rnorm(200, mean=1000, sd=500))), 20, 10)
# 隨機正整數(shù)俯萎,20行傲宜,20列
colnames(data)=paste("Species", 1:10, sep=".")
# 列名
rownames(data)=paste("Sample", 1:20, sep=".")
# 行名
# 得到樣品物種豐度矩陣,如下:
2 模擬分組
group=c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B")
sample_id=rownames(data)
data_group=data.frame(sample_id, group)
# 得到分組文件夫啊,如下:
3 標準化豐度
data_norm=data
for(i in 1:20){
sample_sum=apply(data, 1, sum)
# 統(tǒng)計每個樣品的總細菌數(shù)量
for(j in 1:10){
data_norm[i,j]=data[i,j]/sample_sum[i]
# 將每個樣品的總細菌數(shù)量控制為1
}
}
4 調整格式
library(reshape2)
# 加載用于處理數(shù)據(jù)格式的reshape2包
Taxonomy=colnames(data)
# 從data矩陣中提取物種分類信息
data_frame=data.frame(t(data_norm), Taxonomy)
# 新建數(shù)據(jù)框
data_frame=melt(data_frame, id='Taxonomy')
# 根據(jù)Taxonomy和Sample將所有豐度豎著排列
names(data_frame)[2]='sample_id'
# 重命名variable為sample_id函卒,保持與data_group的樣品變量名一致
data_frame=merge(data_frame, data_group, by='sample_id')
# 根據(jù)樣品變量名,給data_frame添加分組信息撇眯,如下:
5 ggplot2繪制堆疊圖
1 普通堆疊圖
geom_col(position = 'stack')”报嵌,y軸展示原始計數(shù)
geom_col(position = 'fill'),y軸展示菌豐度除以其在各樣本中的菌總豐度
library(ggplot2)
stack_plot=ggplot(data_frame, aes(x=sample_id, fill=Taxonomy, y=value*100))+
# 數(shù)據(jù)輸入:樣本叛本、物種、豐度
geom_col(position='stack') +
# stack:堆疊圖
labs(x='Samples', y='Relative Abundance (%)')+
# 給xy軸取名
scale_y_continuous(expand=c(0, 0))+
# 調整y軸屬性
theme(axis.text.x=element_text(angle=45, hjust=1))
# angle:調整橫軸標簽傾斜角度
# hjust:上下移動橫軸標簽
ggsave(stack_plot, filename="stack_plot.pdf")
2 拆成柱形圖
geom_bar()和geom_col()都可以完成堆疊圖和柱形圖
position=position_dodge(0)默認值為0彤钟,即默認繪制堆疊圖来候,如果position_dodge > width則能拆開堆疊圖得到分組柱形圖。
stack_plot = ggplot(data_frame, aes(x=sample_id, fill=Taxonomy, y=value))+
# 數(shù)據(jù)輸入:樣本逸雹、物種营搅、豐度
geom_bar(stat="identity", position=position_dodge(0.75), width=0.5) +
# geom_col(position=position_dodge(0.75), width=0.5) +
# stack:堆疊圖
labs(x='Samples', y='Relative Abundance (%)')+
# 給xy軸取名
scale_y_continuous(expand=c(0, 0))+
# 調整y軸屬性
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave(stack_plot, filename="stack_plot.pdf", width=14)
3 添加沖積圖
geom_bar(stat='identity') # 同樣可以做堆疊圖
geom_alluvium() # 添加沖積圖
geom_stratum(width=0.45, size=0.1) # 添加階層,下圖中的黑線
安裝依賴:
install.packages("ggalluvial")
library("ggalluvial")
install.packages("rlang", version="0.4.7")
packageVersion("rlang")
繪制沖積圖:
stack_plot=ggplot(data_frame,
aes(x=sample_id,
y=value*100,
fill=Taxonomy,
stratum = Taxonomy,
alluvium = Taxonomy)) +
geom_bar(stat='identity', width=0.45) +
geom_alluvium() +
geom_stratum(width=0.45, size=0.1) +
labs(x='Samples', y='Relative Abundance (%)')+
scale_y_continuous(expand=c(0, 0))+
theme(axis.text.x=element_text(angle=45, hjust=1))
ggsave(stack_plot, filename="stack_plot.pdf")
4 添加facet_wrap分面
facet_wrap(~group, scales = 'free_x', ncol = 2) # 按group組梆砸,X軸转质,分2面
stack_plot=ggplot(data_frame, aes(x=sample_id,
fill=Taxonomy,
y=value*100,
stratum = Taxonomy,
alluvium = Taxonomy))+
geom_col(position='stack') +
geom_alluvium() +
geom_stratum(width=0.45, size=0.1) +
labs(x='Samples', y='Relative Abundance (%)')+
scale_y_continuous(expand=c(0, 0))+
theme(axis.text.x=element_text(angle=45, hjust=1))+
facet_wrap(~group, scales = 'free_x', ncol = 2)
ggsave(stack_plot, filename="stack_plot.pdf")
5 添加geom_segment分組標記
數(shù)據(jù)準備:準備geom_segment需要的x、x_end值
x_start = c()
x_end = c()
for(i in 1:nrow(data_frame))
{
tmp = unlist(strsplit(as.character(data_frame[,1])[i], split="\\."))
x_start = c(x_start, as.numeric(tmp[2]) - 0.5)
x_end = c(x_end, as.numeric(tmp[2]) + 0.5)
}
data_frame = data.frame(data_frame, x_start, x_end)
繪圖:
stack_plot = ggplot(data=data_frame, mapping=aes(x=sample_id,
fill=Taxonomy,
y=value*100,
stratum = Taxonomy,
alluvium = Taxonomy)) +
geom_col(position='stack') +
geom_alluvium() +
geom_stratum(width=0.45, size=0.1) +
labs(x='Samples', y='Relative Abundance (%)') +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
scale_y_continuous(limits=c(0, 115),
# 定義y軸范圍
expand = c(0, 0),
# 定義y軸外展范圍
breaks = c(0, 20, 40, 60, 80, 100)) +
# 定義y軸展示的每個刻度
geom_segment(mapping=aes(
x = x_start,
y = 105,
xend = x_end,
yend = 105,
color = group
), size = 5)
ggsave(stack_plot, filename="stack_plot.pdf")
6 翻轉90度
facet_wrap(~group, scales = 'free_y', ncol = 2) # 按group組帖世,Y軸休蟹,分2面
coord_flip() # 旋轉90度
stack_plot=ggplot(data_frame, aes(x=sample_id,
fill=Taxonomy,
y=value*100,
stratum = Taxonomy,
alluvium = Taxonomy))+
geom_col(position='stack') +
geom_alluvium() +
geom_stratum(width=0.45, size=0.1) +
labs(x='Samples', y='Relative Abundance (%)')+
scale_y_continuous(expand=c(0, 0))+
theme(axis.text.x=element_text(angle=45, hjust=1))+
facet_wrap(~group, scales = 'free_y', ncol = 2) +
coord_flip()
ggsave(stack_plot, filename="stack_plot.pdf")
7 繪制堆疊面積圖
數(shù)據(jù)準備:給每個樣品按數(shù)字編號
id=rep(1:20, each=10)
data_frame=data.frame(data_frame, id)
# 給每個樣品重新編號
繪圖:
stack_plot=ggplot(data_frame, aes(id, fill=Taxonomy, value*100))+
geom_area() +
# 堆疊面積圖
labs(x='Samples', y='Relative Abundance (%)')+
scale_x_continuous(breaks=1:20, labels=as.character(1:20), expand=c(0, 0))+
scale_y_continuous(expand=c(0, 0))+
# 調整x軸刻度和坐標軸屬性
theme(panel.grid=element_blank(), panel.background=element_rect(color='black', fill='transparent'))
# 調整背景
ggsave(stack_plot, filename="stack_plot.pdf")
這配色似乎還可以
ggplot(input, aes(x=name, y=value, fill=variable)) +
geom_col(position="stack") +
theme_classic() +
scale_fill_manual(values=brewer.pal(6, "Set2")) +
theme(legend.text=element_text(size=15),
legend.title=element_text(face='bold', size=20)) +
theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 18),
axis.line = element_line(size = 1),
axis.ticks = element_line(size = 1)) +
theme(text=element_text(family="serif")) +
labs(x="Phyla", y="CAZyme genes per genome", fill="CAZymes") +
coord_flip()
一組好看的堆疊圖參數(shù):
ggplot(input, aes(x=variable, y=value*100, fill=Genus)) +
geom_col(position="stack") +
theme_classic() +
scale_fill_manual(values = colors) +
theme(legend.text=element_text(size=15),
legend.title=element_text(face='bold', size=20)) +
labs(x="",
y="Relative abundance",
fill="Genus") +
theme(title = element_text(size = 15, face="bold")) +
scale_y_continuous(expand = c(0, 0)) +
theme(axis.title = element_text(size = 25),
axis.text.y = element_text(size = 18),
axis.line = element_line(size = 1),
axis.ticks = element_line(size = 1)) +
theme(axis.text.x = element_text(angle = 60,
hjust = 1,
size = 20,
color = col_text))
ggsave(result, file="phylum_stack.png", width = 10)
ggsave(result, file="phylum_stack.pdf", width = 10)
參考:
R語言ggplot2繪制分組箱型圖和分組柱狀圖
Make Grouped Boxplots with ggplot2