Fst值的計(jì)算
群體間分化指數(shù)fst,取值范圍0~1,值越大表明群體間分化程度越高位他,親緣關(guān)系越遠(yuǎn)氛濒。
#按照窗口模式計(jì)算(更準(zhǔn)確)
vcftools --vcf Filter.snp.vcf --weir-fst-pop 1-population.txt --weir-fst-pop 2-population.txt --out p1_p2_window --fst-window-size 500000 --fst-window-step 50000
vcftools --vcf Filter.snp.vcf --weir-fst-pop 1-population.txt --weir-fst-pop 3-population.txt --out p1_p3_window --fst-window-size 500000 --fst-window-step 50000
vcftools --vcf Filter.snp.vcf --weir-fst-pop 2-population.txt --weir-fst-pop 3-population.txt --out p2_p3_window --fst-window-size 500000 --fst-window-step 50000
#參數(shù)
#weir-fst-pop 需要計(jì)算的群體ID名,與vcf文件里的群體ID名一致鹅髓,該文件必須是txt格式舞竿,每個(gè)ID占一行
#--fst-window-size --fst-window-step 設(shè)置窗口大小和步長(zhǎng)的參數(shù),按自己的需求而定
#根據(jù)加權(quán)fst為Fst排序
sort -k 5 -g p1_p2_window.windowed.weir.fst > p1_p2.sorted.fst
#窗口計(jì)數(shù)
wc -l p1_p2.sorted.fst #假設(shè)為20000
#取前1%
tail -n 200 p1_p2.sorted.fst > p1_p2.sorted.1%.fst
#找出前1%中最小的加權(quán)fst值
這個(gè)窗口大小和步長(zhǎng)不影響結(jié)果
曼哈頓圖
#處理數(shù)據(jù)文件格式(sed '1d' 去掉表頭窿冯,awk 如果加權(quán)FST<0,則取0
sed '1d' p1_p2_window.windowed.weir.fst|awk '{if($5<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$5}' > p1_p2_fst.txt
首先上傳數(shù)據(jù)骗奖,每列數(shù)據(jù)類型都為數(shù)值型
library(qqman)
filt=data.frame(p1_p2_fst)
SNP <- paste(filt[,1],filt[,2],sep = ":")
Fstfile <- cbind(SNP,filt)#合并
colnames(Fstfile)<-c("SNP","CHR","POS","FST")#添加列名
colorset<-c("#FF0000","#FFD700","#2E8B57","#7FFFAA","#6495ED","#0000FF")
manhattan(Fstfile,chr="CHR",bp="POS",p="FST",snp="SNP",col=colorset,logp=F,genomewideline=0.9,ylab="Fst",font.lab=2,cex.lab=1,main="G1vsG2",cex=0.8)
#genomewideline=0.9 前1%的最小值
計(jì)算總體PI值和TajimaD
核酸多樣性PI,值越大說(shuō)明核苷酸多樣性越高
TajimaD用于檢驗(yàn)DNA序列在演化國(guó)產(chǎn)過(guò)程中是否遵循中性演化模型。D>0:平衡選擇执桌,突然群體收縮鄙皇;D<0:最近的選擇性清除,最近瓶頸后的群體擴(kuò)張仰挣,與消除基因連鎖伴逸;D=0:群體根據(jù)突變-漂移平衡演變,沒(méi)有選擇膘壶。
vcftools --vcf Filter.snp.vcf --window-pi 500000 --out total
vcftools --vcf Filter.snp.vcf --TajimaD 500000 --out total
計(jì)算每個(gè)亞群的PI值和TajimaD值
#創(chuàng)建bash腳本
vi test.sh
#寫(xiě)入以下內(nèi)容:
for i in {1..3};do
#根據(jù)ID错蝴,從vcf文件中提取每個(gè)亞群的信息
vcftools --vcf Filter.snp.vcf --keep ${i}-population.txt --recode --
recode-INFO-all --out p${i}
#根據(jù)提取的信息計(jì)算每個(gè)亞群的PI值
vcftools --vcf p${i}.recode.vcf --out q${i}_pi_500kb --window-pi
500000 --window-pi-step 50000
#根據(jù)提取的信息計(jì)算每個(gè)亞群的TajimaD值
vcftools --vcf p${i}.recode.vcf --out q${i}_500kb.TajimaD --TajimaD
500000
done
#給bash腳本添加可執(zhí)行權(quán)限
chmod +x test.sh
#運(yùn)行腳本
./test.sh
根據(jù)pi值畫(huà)箱線圖和小提琴圖
#添加分組信息G1、G2颓芭、G3
data=data.frame(q1_500kb_pi_windowed)
data
Group=c("G1")
data1=data.frame(data,Group)
data=data.frame(q2_500kb_pi_windowed)
data
Group=c("G2")
data2=data.frame(data,Group)
data=data.frame(q3_500kb_pi_windowed)
data
Group=c("G3")
data3=data.frame(data,Group)
#將三組數(shù)據(jù)合并
library(dplyr)
total_data<-dplyr::bind_rows(data1,data2,data3)
#畫(huà)箱線圖+小提琴圖
p <- ggplot(total_data,aes(Group,PI,fill=Group))+geom_boxplot(width=.2)+geom_violin()
mytheme <- theme(plot.title = element_text(face = "bold.italic", size = "14", colour = "brown"), axis.title = element_text(face = "bold.italic", size = "10",color = "blue"), axis.text.x = element_text(face = "bold", size = 8, angle = 45, hjust = 1, vjust = 1), axis.text.y = element_text(face = "bold",size = 8), panel.background = element_rect(fill = "white", color = "black"), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), legend.text = element_text(size = 8), legend.title = element_text(size = 10, face = "bold"), panel.grid.minor.x = element_blank())
p + mytheme
根據(jù)PI值畫(huà)折線圖
#輸入數(shù)據(jù)時(shí)顷锰,每列必須是數(shù)值型數(shù)據(jù),尤其是染色體那一列
data=data.frame(q1_500kb_pi_windowed)
Group=c("G1")
data1=data.frame(data,Group)
data=data.frame(q2_500kb_pi_windowed)
Group=c("G2")
data2=data.frame(data,Group)
data=data.frame(q3_500kb_pi_windowed)
Group=c("G3")
data3=data.frame(data,Group)
#將三組數(shù)據(jù)合并
library(dplyr)
library(ggplot2)
total_data<-dplyr::bind_rows(data1,data2,data3)
for (i in 1:7){
result_name = paste("chr",i,".png",sep="")
png(result_name,width=1500,height=300)
chrom = subset(total_data,CHROM==i)
xlab = paste("Chromosome",i,"(MB)",sep="")
p <- ggplot(chrom,aes(x=BIN_END/1000000,y=PI,color=Group,shape=Group)) + geom_line(size=0.5) + xlab(xlab)+ ylab("Pi") + theme_bw()
print(p)
dev.off()
}