1.使用vcftools進(jìn)行文件處理
vcftools --gzvcf merge.DP7.miss0.5.maf0.05.SNP.vcf.gz --chr 1 --chr 2 --chr 3 --chr 4 --chr 5 --chr 6 --chr 7 --chr 8 --chr 9 --chr 10 --chr 11 --chr 12 --chr 13 --chr 14 --chr 15 --chr 16 --chr 17 --chr 18 --chr 19 --chr 20 --chr 21 --chr 22 --chr 23 --chr 24 --chr 25 --chr 26 --chr 27 --chr 28 --chr 29 --chr 30 --chr 31 --chr 32 --chr 33 --recode --stdout | bgzip -c >chicken_chr1-33.vcf.gz #去掉文件中的非常染色體位置
2.使用plink進(jìn)行snp數(shù)據(jù)質(zhì)控
plink --vcf chicken_chr1-33.vcf.gz --chr-set 33 --geno 0.1 --maf 0.05 --recode vcf --out chicken_chr1-33_zk
3.使用beagle對(duì)VCF文件進(jìn)行基因型填充
beagle -Xmx20g gt=chicken_chr1-33_zk.vcf out=chicken_chr1-33_zk_tc ne=1500
4.計(jì)算ihs
R腳本
library(rehh)
library(data.table)
n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for(i in n) {
hh <- data2haplohh(hap_file = "chicken_chr1-33_zk_tc.vcf.gz",chr.name = i,polarize_vcf = FALSE,vcf_reader = "data.table")
scan <- scan_hh(hh)
if (i == 1) {
wgscan <- scan
} else {
wgscan <- rbind(wgscan, scan)
}
}
wgscan.ihs <- ihh2ihs(wgscan,standardize=FALSE)
wgscan.ihs$ihs$UNIHS <- abs(wgscan.ihs$ihs$UNIHS)
cr.cgu <- calc_candidate_regions(wgscan.ihs,threshold = -1,window_size = 5E4,overlap = 25000,min_n_mrk = 10, min_n_extr_mrk = 1, min_perc_extr_mrk = 0,join_neighbors = FALSE,pval = TRUE) #如果設(shè)置步長(zhǎng)吧慢,也就是overlap,步長(zhǎng)必須能被窗口整除象对,滑動(dòng)窗口50000巷帝,步長(zhǎng)25000
write.table(cr.cgu,file="window.ihs.txt",sep="\t")
top1<-subset(cr.cgu,cr.cgu$MEAN_MRK> quantile(cr.cgu$MEAN_MRK, 0.95)) #取前5%為顯著區(qū)域
write.table(top1,file = "candidate_slide_ihs_0.95.txt",sep="\t")
top2<-subset(cr.cgu,cr.cgu$MEAN_MRK> quantile(cr.cgu$MEAN_MRK, 0.99)) #取前1%為顯著區(qū)域
write.table(top2,file = "candidate_slide_ihs_0.99.txt",sep="\t")
nohup Rscript ihs.R & #運(yùn)行R腳本
按窗口畫(huà)ihs曼哈頓圖
library(rehh)
ihs.cgu_eut <- read.table("window.ihs.txt",header = T,sep = ",")
quantile(ihs.cgu_eut$MEAN_MRK, 0.99)
names(ihs.cgu_eut)[3] <- "POSITION" #改第三列的列名
b<-ihs.cgu_eut[,c(2:3,6)]
manhattanplot(b,ylab = "ihs",threshold = 2.762571)
5.使用bcftools拆分不同亞群的文件
準(zhǔn)備6個(gè)亞群的文件 #在chicken/data目錄下
H1.txt 內(nèi)容如下
1_1
3_3
2_2
9_9
4_4
5_5
6_6
8_8
11_11
12_12
13_13
14_14
16_16
10_10
17_17
18_18
19_19
20_20
21_21
22_22
23_23
15_15
24_24
25_25
26_26
27_27
28_28
29_29
48_48
30_30
31_31
32_32
33_33
35_35
36_36
37_37
38_38
39_39
49_49
50_50
40_40
41_41
42_42
43_43
34_34
44_44
45_45
46_46
47_47
7_7
L1.txt 內(nèi)容如下
66_66
53_53
54_54
52_52
51_51
59_59
55_55
56_56
58_58
60_60
67_67
65_65
62_62
63_63
64_64
61_61
82_82
68_68
69_69
70_70
71_71
72_72
73_73
74_74
75_75
80_80
77_77
78_78
79_79
76_76
83_83
57_57
91_91
81_81
97_97
84_84
85_85
86_86
87_87
88_88
89_89
90_90
98_98
92_92
93_93
94_94
95_95
96_96
100_100
99_99
H2.txt
1_1
3_3
2_2
9_9
4_4
5_5
6_6
8_8
11_11
12_12
13_13
14_14
16_16
10_10
17_17
18_18
19_19
20_20
21_21
22_22
23_23
15_15
24_24
25_25
26_26
27_27
31_31
38_38
39_39
33_33
35_35
30_30
32_32
36_36
48_48
L2.txt
67_67
65_65
68_68
69_69
70_70
71_71
72_72
73_73
74_74
75_75
80_80
77_77
78_78
79_79
76_76
83_83
57_57
91_91
81_81
97_97
84_84
85_85
86_86
87_87
88_88
89_89
90_90
98_98
92_92
93_93
94_94
95_95
96_96
100_100
99_99
H3.txt
1_1
3_3
2_2
9_9
4_4
5_5
6_6
8_8
11_11
12_12
13_13
14_14
16_16
10_10
17_17
18_18
19_19
20_20
21_21
22_22
L3.txt
80_80
79_79
76_76
81_81
97_97
84_84
85_85
86_86
87_87
88_88
89_89
90_90
98_98
92_92
93_93
94_94
95_95
96_96
100_100
99_99
tabix -p vcf chicken_chr1-33_zk_tc.vcf.gz
bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S H1.txt -o H1.vcf.gz
bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S L1.txt -o L1.vcf.gz
bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S H2.txt -o H2.vcf.gz
bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S L2.txt -o L2.vcf.gz
bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S H3.txt -o H3.vcf.gz
bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S L3.txt -o L3.vcf.gz
6.使用PopLDdecay軟件進(jìn)行連鎖不平衡分析叁扫,并作LD衰減圖
git clone https://github.com/BGI-shenzhen/PopLDdecay.git #在soft目錄下使用
cd PopLDdecay
chmod 755 configure
./configure
make
mv PopLDdecay bin/
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/chicken_chr1-33_zk_tc.vcf.gz -OutStat /public/jychu/chicken/data/LD/chicken_chr1-33_zk_tc_LD #在PopLDdecay目錄下使用此命令疆柔,計(jì)算LD值
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/H1.vcf.gz -OutStat /public/jychu/chicken/data/LD/H1
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/L1.vcf.gz -OutStat /public/jychu/chicken/data/LD/L1
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/H2.vcf.gz -OutStat /public/jychu/chicken/data/LD/H2
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/L2.vcf.gz -OutStat /public/jychu/chicken/data/LD/L2
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/H3.vcf.gz -OutStat /public/jychu/chicken/data/LD/H3
./bin/PopLDdecay -InVCF /public/jychu/chicken/data/L3.vcf.gz -OutStat /public/jychu/chicken/data/LD/L3
perl bin/Plot_OnePop.pl -inFile /public/jychu/chicken/data/LD/chicken_chr1-33_zk_tc_LD.stat.gz -output /public/jychu/chicken/data/LD/chicken_chr1-33_zk_tc_LD #畫(huà)圖
多群體畫(huà)圖
vim H_multi.list #在LD目錄下
/public/jychu/chicken/data/LD/H1.stat.gz high_1
/public/jychu/chicken/data/LD/H2.stat.gz high_2
/public/jychu/chicken/data/LD/H3.stat.gz high_3
vim L_multi.list
/public/jychu/chicken/data/LD/L1.stat.gz low_1
/public/jychu/chicken/data/LD/L2.stat.gz low_2
/public/jychu/chicken/data/LD/L3.stat.gz low_3
在PopLDdecay目錄下
perl bin/Plot_MultiPop.pl -inList /public/jychu/chicken/data/LD/H_multi.list -output /public/jychu/chicken/data/LD/LD_high
perl bin/Plot_MultiPop.pl -inList /public/jychu/chicken/data/LD/L_multi.list -output /public/jychu/chicken/data/LD/LD_low
7.計(jì)算XPEHH值
vim H1-L1_xpehh.R
library(rehh)
library(data.table)
n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for(i in n) {
hh1 <- data2haplohh(hap_file = "H1.vcf.gz",chr.name = i,polarize_vcf = FALSE,vcf_reader = "data.table")
scan1 <- scan_hh(hh1)
if (i == 1) {
wgscan1 <- scan1
} else {
wgscan1 <- rbind(wgscan1, scan1)
}
}
for(i in n) {
hh2 <- data2haplohh(hap_file = "L1.vcf.gz",chr.name = i,polarize_vcf = FALSE,vcf_reader = "data.table")
scan2 <- scan_hh(hh2)
if (i == 1) {
wgscan2 <- scan2
} else {
wgscan2 <- rbind(wgscan2, scan2)
}
}
xpehh.cgu_eut <- ies2xpehh(scan_pop1 = wgscan1,
scan_pop2 = wgscan2,
popname1 = "H1",
popname2 = "L1",standardize=FALSE)
cr.cgu <- calc_candidate_regions(xpehh.cgu_eut,threshold = -100,window_size = 5E4,overlap = 25000,min_n_mrk = 10, min_n_extr_mrk = 1, min_perc_extr_mrk = 0,join_neighbors = FALSE)
write.table(cr.cgu,file = "H1_L1_XPEHH_window.txt",sep ="\t")
source activate dna #因?yàn)榇薘版本是在dna環(huán)境下安裝的
nohup Rscript H1-L1_xpehh.R & #運(yùn)行R腳本
剩下兩個(gè)對(duì)比亞群的和上面代碼一樣
畫(huà)XPEHH疊加圖
1.按窗口畫(huà)
setwd("D:/迅雷下載/選擇信號(hào)分析/XPEHH")
xp3<-read.table("H1_L1_XPEHH_window.txt",header = T)
xp2<-read.table("H2_L2_XPEHH_window.txt",header = T)
xp1<-read.table("H3_L3_XPEHH_window.txt",header = T)
xp1$SNP<-seq(1,nrow(xp1),1)
xp2$SNP<-seq(1,nrow(xp2),1)
xp3$SNP<-seq(1,nrow(xp3),1)
library(doBy)
library(plyr)
n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for (i in n){xp1$SNP[which(xp1$CHR==i)]<-xp1$SNP[which(xp1$CHR==i)]+400*(i-1)}
chr <- summaryBy(SNP~CHR, xp1, FUN = median)
for (i in n){xp2$SNP[which(xp2$CHR==i)]<-xp2$SNP[which(xp2$CHR==i)]+400*(i-1)}
chr <- summaryBy(SNP~CHR, xp2, FUN = median)
for (i in n){xp3$SNP[which(xp3$CHR==i)]<-xp3$SNP[which(xp3$CHR==i)]+400*(i-1)}
chr <- summaryBy(SNP~CHR, xp3, FUN = median)
xp<-cbind(xp1,xp2[,4],xp3[,4])
xp<-na.omit(xp)
top1<-subset(xp,xp$MEAN_MRK> quantile(xp$MEAN_MRK, 0.995)&xp$`xp2[, 4]`> quantile(xp$`xp2[, 4]`, 0.995)&xp$`xp3[, 4]`> quantile(xp$`xp3[, 4]`, 0.995)&xp$MEAN_MRK>xp$`xp2[, 4]`&xp$`xp2[, 4]`>xp$`xp3[, 4]`)
top2<-subset(xp,xp$MEAN_MRK< quantile(xp$MEAN_MRK, 0.005)&xp$`xp2[, 4]`< quantile(xp$`xp2[, 4]`, 0.005)&xp$`xp3[, 4]`< quantile(xp$`xp3[, 4]`, 0.005)&xp$MEAN_MRK<xp$`xp2[, 4]`&xp$`xp2[, 4]`<xp$`xp3[, 4]`)
csv1<-top1[,c(1:4,6:7)]
write.csv(csv1,"top1.csv",row.names = FALSE)
csv2<-top2[,c(1:4,6:7)]
write.csv(csv2,"top2.csv",row.names = FALSE)
library(ggplot2)
p2<-ggplot()+
geom_point(aes(SNP,xp1$MEAN_MRK), data = xp1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,xp2$MEAN_MRK), data = xp2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,xp3$MEAN_MRK), data = xp3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,top1$MEAN_MRK,color="High"), data = top1,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top1$`xp2[, 4]`,color="Mid"), data = top1,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top1$`xp3[, 4]`,color="Low"), data = top1,pch=19, alpha=1, size=1.2) +
geom_point(aes(SNP,top2$MEAN_MRK,color="High"), data = top2,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top2$`xp2[, 4]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top2$`xp3[, 4]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
theme_bw(base_size = 16)+
theme(panel.grid = element_blank(),
axis.line = element_line(colour = 'black'),
panel.background = element_rect(fill = "transparent"),
axis.title.y = element_text(face = "bold"),
legend.position = "right",
legend.text = element_text(face = "bold"),
legend.background = element_rect(colour = '#DCDCDC'),
legend.title = element_text(face = "bold"))+
labs(x = 'Chromosome', y = expression("Xpehh")) +
scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.05,0)))+
scale_y_continuous(expand = c(0.01,0),breaks =seq(-6,6,1.5),limits=c(-8,8))+
scale_color_manual(name="Group",values = c("High"="red","Mid"="#8ec965","Low"="black"),limit=c("High","Mid","Low"))+
geom_hline(aes(yintercept=0),col="black",lty=2,lwd=0.1)+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
ggsave("xpehh.png")
2.按位點(diǎn)畫(huà)
setwd("D:/迅雷下載/選擇信號(hào)分析final/XPEHH")
xp3<-read.table("H1_L1_XPEHH.txt",header = T)
xp2<-read.table("H2_L2_XPEHH.txt",header = T)
xp1<-read.table("H3_L3_XPEHH.txt",header = T)
xp1$SNP<-seq(1,nrow(xp1),1)
xp2$SNP<-seq(1,nrow(xp2),1)
xp3$SNP<-seq(1,nrow(xp3),1)
library(doBy)
library(plyr)
n<- c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for (i in n){xp1$SNP[which(xp1$CHR==i)]<-xp1$SNP[which(xp1$CHR==i)]+28000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHR, xp1, FUN = median)
for (i in n){xp2$SNP[which(xp2$CHR==i)]<-xp2$SNP[which(xp2$CHR==i)]+28000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHR, xp2, FUN = median)
for (i in n){xp3$SNP[which(xp3$CHR==i)]<-xp3$SNP[which(xp3$CHR==i)]+28000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHR, xp3, FUN = median)
xp<-cbind(xp1,xp2[,3],xp3[,3])
xp<-na.omit(xp)
top1<-subset(xp,xp$UNXPEHH_H3_L3> quantile(xp$UNXPEHH_H3_L3, 0.995)&xp$`xp2[, 3]`> quantile(xp$`xp2[, 3]`, 0.995)&xp$`xp3[, 3]`> quantile(xp$`xp3[, 3]`, 0.995)&xp$UNXPEHH_H3_L3>xp$`xp2[, 3]`&xp$`xp2[, 3]`>xp$`xp3[, 3]`)
top2<-subset(xp,xp$UNXPEHH_H3_L3< quantile(xp$UNXPEHH_H3_L3, 0.005)&xp$`xp2[, 3]`< quantile(xp$`xp2[, 3]`, 0.005)&xp$`xp3[, 3]`< quantile(xp$`xp3[, 3]`, 0.005)&xp$UNXPEHH_H3_L3<xp$`xp2[, 3]`&xp$`xp2[, 3]`<xp$`xp3[, 3]`)
csv1<-top1[,c(1:3,5:6)]
write.csv(csv1,"top0.01.csv",row.names = FALSE)
csv2<-top2[,c(1:3,5:6)]
write.csv(csv2,"low0.01.csv",row.names = FALSE)
library(ggplot2)
p2<-ggplot()+
geom_point(aes(SNP,xp1$UNXPEHH_H3_L3), data = xp1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,xp2$UNXPEHH_H2_L2), data = xp2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,xp3$UNXPEHH_H1_L1), data = xp3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,top1$UNXPEHH_H3_L3,color="High"), data = top1,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top1$`xp2[, 3]`,color="Mid"), data = top1,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top1$`xp3[, 3]`,color="Low"), data = top1,pch=19, alpha=1, size=1.2) +
geom_point(aes(SNP,top2$UNXPEHH_H3_L3,color="High"), data = top2,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top2$`xp2[, 3]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top2$`xp3[, 3]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
theme_bw(base_size = 16)+
theme(panel.grid = element_blank(),
axis.line = element_line(colour = 'black'),
panel.background = element_rect(fill = "transparent"),
axis.title.y = element_text(face = "bold"),
legend.position = "right",
legend.text = element_text(face = "bold"),
legend.background = element_rect(colour = '#DCDCDC'),
legend.title = element_text(face = "bold"))+
labs(x = 'Chromosome', y = expression("Xpehh")) +
scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.05,0)))+
scale_y_continuous(expand = c(0.01,0),breaks =seq(-5,5,1),limits=c(-6,6))+
scale_color_manual(name="Group",values = c("High"="red","Mid"="#22ff96","Low"="black"),limit=c("High","Mid","Low"))+
geom_hline(aes(yintercept=0),col="black",lty=2,lwd=0.1)+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
ggsave("xpehh.tiff",width = 12,height = 3.5)
8.使用vcftools計(jì)算FST值
準(zhǔn)備上面六個(gè)亞群的名稱(chēng)文件
個(gè)體名稱(chēng)要和總?cè)后wvcf格式里面的名稱(chēng)一致
vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H1.txt --weir-fst-pop L1.txt --out fst_H1vsL1 --fst-window-size 50000 --fst-window-step 25000
vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H2.txt --weir-fst-pop L2.txt --out fst_H2vsL2 --fst-window-size 50000 --fst-window-step 25000
vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H3.txt --weir-fst-pop L3.txt --out fst_H3vsL3 --fst-window-size 50000 --fst-window-step 25000
cat fst_H1vsL1.windowed.weir.fst |awk '{if($5<0)print $1"\t"$2"\t"$3"\t0";else print $1"\t"$2"\t"$3"\t"$6}' > fst_H1-L1.txt
cat fst_H2vsL2.windowed.weir.fst |awk '{if($5<0)print $1"\t"$2"\t"$3"\t0";else print $1"\t"$2"\t"$3"\t"$6}' > fst_H2-L2.txt
cat fst_H3vsL3.windowed.weir.fst |awk '{if($5<0)print $1"\t"$2"\t"$3"\t0";else print $1"\t"$2"\t"$3"\t"$6}' > fst_H3-L3.txt
vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H1.txt --weir-fst-pop L1.txt --out fst_H1vsL1
vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H2.txt --weir-fst-pop L2.txt --out fst_H2vsL2
vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H3.txt --weir-fst-pop L3.txt --out fst_H3vsL3
cat fst_H1vsL1.weir.fst |awk '{if($3<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$3}' > fst_H1-L1.txt
cat fst_H2vsL2.weir.fst |awk '{if($3<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$3}' > fst_H2-L2.txt
cat fst_H3vsL3.weir.fst |awk '{if($3<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$3}' > fst_H3-L3.txt
R語(yǔ)言繪制Fst疊加圖
setwd("D:/迅雷下載/選擇信號(hào)分析/FST")
fst3<-read.table("fst_H1-L1.txt",header = T)
fst2<-read.table("fst_H2-L2.txt",header = T)
fst1<-read.table("fst_H3-L3.txt",header = T)
fst1$SNP<-seq(1,nrow(fst1),1)
fst2$SNP<-seq(1,nrow(fst2),1)
fst3$SNP<-seq(1,nrow(fst3),1)
library(doBy)
library(plyr)
n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for (i in n){fst1$SNP[which(fst1$CHROM==i)]<-fst1$SNP[which(fst1$CHROM==i)]+400*(i-1)}
chr <- summaryBy(SNP~CHROM, fst1, FUN = median)
for (i in n){fst2$SNP[which(fst2$CHROM==i)]<-fst2$SNP[which(fst2$CHROM==i)]+400*(i-1)}
chr <- summaryBy(SNP~CHROM, fst2, FUN = median)
for (i in n){fst3$SNP[which(fst3$CHROM==i)]<-fst3$SNP[which(fst3$CHROM==i)]+400*(i-1)}
chr <- summaryBy(SNP~CHROM, fst3, FUN = median)
fst<-cbind(fst1,fst2[,4],fst3[,4])
fst<-na.omit(fst)
top2<-subset(fst,fst$WEIGHTED_FST> quantile(fst$WEIGHTED_FST, 0.99)&fst$`fst2[, 4]`> quantile(fst$`fst2[, 4]`, 0.99)&fst$`fst3[, 4]`> quantile(fst$`fst3[, 4]`, 0.99)&fst$WEIGHTED_FST>fst$`fst2[, 4]`&fst$`fst2[, 4]`>fst$`fst3[, 4]`)
csv2<-top2[,c(1:4,6:7)]
write.csv(csv2,"top2.txt",row.names = FALSE)
library(ggplot2)
p2<-ggplot()+
geom_point(aes(SNP,fst1$WEIGHTED_FST), data = fst1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,fst2$WEIGHTED_FST), data = fst2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,fst3$WEIGHTED_FST), data = fst3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,top2$WEIGHTED_FST,color="High"), data = top2,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top2$`fst2[, 4]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top2$`fst3[, 4]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
theme_bw(base_size = 16)+
theme(panel.grid = element_blank(),
axis.line = element_line(colour = 'black'),
panel.background = element_rect(fill = "transparent"),
axis.title.y = element_text(face = "bold"),
legend.position = "right",
legend.text = element_text(face = "bold"),
legend.background = element_rect(colour = '#DCDCDC'),
legend.title = element_text(face = "bold"))+
labs(x = 'Chromosome', y = expression("Fst")) +
scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHROM,expand=(c(0.05,0)))+
scale_y_continuous(expand = c(0.01,0),breaks =seq(-0.05,0.25,0.05),limits=c(0,0.25))+
scale_color_manual(name="Group",values = c("High"="red","Mid"="#8ec965","Low"="black"),limit=c("High","Mid","Low"))+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
ggsave("fst.png")
按位點(diǎn)畫(huà)圖
setwd("D:/迅雷下載/選擇信號(hào)分析final/FST")
fst3<-read.table("fst_H1-L1.txt",header = T)
fst2<-read.table("fst_H2-L2.txt",header = T)
fst1<-read.table("fst_H3-L3.txt",header = T)
fst1$SNP<-seq(1,nrow(fst1),1)
fst2$SNP<-seq(1,nrow(fst2),1)
fst3$SNP<-seq(1,nrow(fst3),1)
library(doBy)
library(plyr)
n<- c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for (i in n){fst1$SNP[which(fst1$CHROM==i)]<-fst1$SNP[which(fst1$CHROM==i)]+28000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHROM, fst1, FUN = median)
for (i in n){fst2$SNP[which(fst2$CHROM==i)]<-fst2$SNP[which(fst2$CHROM==i)]+28000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHROM, fst2, FUN = median)
for (i in n){fst3$SNP[which(fst3$CHROM==i)]<-fst3$SNP[which(fst3$CHROM==i)]+28000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHROM, fst3, FUN = median)
fst<-cbind(fst1,fst2[,3],fst3[,3])
fst<-na.omit(fst)
top2<-subset(fst,fst$WEIR_AND_COCKERHAM_FST> quantile(fst$WEIR_AND_COCKERHAM_FST, 0.99)&fst$`fst2[, 3]`> quantile(fst$`fst2[, 3]`, 0.99)&fst$`fst3[, 3]`> quantile(fst$`fst3[, 3]`, 0.99)&fst$WEIR_AND_COCKERHAM_FST>fst$`fst2[, 3]`&fst$`fst2[, 3]`>fst$`fst3[, 3]`)
csv2<-top2[,c(1:3,5:6)]
write.csv(csv2,"houxun_fst.csv",row.names = FALSE)
library(ggplot2)
p2<-ggplot()+
geom_point(aes(SNP,fst1$WEIR_AND_COCKERHAM_FST), data = fst1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,fst2$WEIR_AND_COCKERHAM_FST), data = fst2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,fst3$WEIR_AND_COCKERHAM_FST), data = fst3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) +
geom_point(aes(SNP,top2$WEIR_AND_COCKERHAM_FST,color="High"), data = top2,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top2$`fst2[, 3]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.45) +
geom_point(aes(SNP,top2$`fst3[, 3]`,color="Low"), data = top2,pch=19, alpha=1, size=1.3) +theme_bw(base_size = 16)+
theme(panel.grid = element_blank(),
axis.line = element_line(colour = 'black'),
panel.background = element_rect(fill = "transparent"),
axis.title.y = element_text(face = "bold"),
legend.position = "right",
legend.text = element_text(face = "bold"),
legend.background = element_rect(colour = '#DCDCDC'),
legend.title = element_text(face = "bold"))+
labs(x = 'Chromosome', y = expression("Fst")) +
scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHROM,expand=(c(0.05,0)))+
scale_y_continuous(expand = c(0.01,0),breaks =seq(-0.05,0.30,0.05),limits=c(0,0.30))+
scale_color_manual(name="Group",values = c("High"="red","Mid"="#22ff96","Low"="black"),limit=c("High","Mid","Low"))+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
ggsave("fst4.tiff",width = 12,height = 3.5)
9.使用SnpEff 對(duì)SNP結(jié)果進(jìn)行注釋
wget http://sourceforge.net/projects/snpeff/files/snpEff_latest_core.zip #在家目錄下, 下載安裝包
unzip snpEff_latest_core.zip #進(jìn)行解壓绰咽,會(huì)產(chǎn)生一個(gè)snpEff目錄 所有的程序都在這里面
修改 snpEff 目錄下的注釋文件 snpEff.config,在“Third party databases”行下加入如下內(nèi)容:
# chicken genome, version gall6
gall6.genome : chicken
在 snpEff 目錄下段多,創(chuàng)建目錄 data, data/gall6, data/genomes
將 gtf 文件放入gall6目錄下,并改名為 genes.gtf壮吩;將基因組序列文件放入 genomes 目錄下进苍,并改名為 gall6.fa
wget http://ftp.ensembl.org/pub/release-101/gtf/gallus_gallus/Gallus_gallus.GRCg6a.101.gtf.gz
mv Gallus_gallus.GRCg6a.101.gtf genes.gtf
wget http://ftp.ensembl.org/pub/release-101/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz
mv Gallus_gallus.GRCg6a.dna.toplevel.fa gall6.fa
GWAS群體的基因型信息(vcf文件):chicken_chr1-33_zk_tc.vcf 放入snpEff/data目錄下。
數(shù)據(jù)庫(kù)建立:在 snpEff 目錄下鸭叙,執(zhí)行命令:
java -jar snpEff.jar build -gtf22 -v gall6 #如果成功那么在gall6目錄下會(huì)有一個(gè)".bin"文件產(chǎn)生
對(duì)vcf格式文件進(jìn)行注釋
在data目錄下命令如下:
java -Xmx16g -jar ~/snpEff/snpEff.jar eff -v gall6 -c ../snpEff/snpEff.config -i vcf chicken_chr1-33_zk_tc.vcf >chicken_chr1-33_zk_tc_zs.vcf
10.對(duì)候選區(qū)域進(jìn)行注釋
vim FST
1 44375001 44425000
1 88050001 88100000 #以TAB鍵分割
vim h_xpehhhouxun_fst.txt
vim l_xpehh
bgzip chicken_chr1-33_zk_tc_zs.vcf
tabix -p vcf chicken_chr1-33_zk_tc_zs.vcf.gz
bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -R FST > FST_HX.vcf
bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -R h_xpehh > h_xpehh_HX.vcf
bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -R l_xpehh > l_xpehh_HX.vcf
11.提取不同位點(diǎn)的注釋
vim houxun_fst.txt
1 44375001
1 88050001
bgzip chicken_chr1-33_zk_tc_zs.vcf
tabix -p vcf chicken_chr1-33_zk_tc_zs.vcf.gz
bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -T houxun_fst.txt > houxun_fst.vcf
bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -T hx_xpehh_high.txt > hx_xpehh_high.vcf
bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -T hx_xpehh_low.txt > hx_xpehh_low.vcf
12.SNP注釋
wget http://ftp.ensembl.org/pub/release-101/variation/vcf/gallus_gallus/gallus_gallus.vcf.gz
tabix -p vcf gallus_gallus.vcf.gz
java -jar ~/snpEff/SnpSift.jar annotate ~/chicken/finaldate0.2/00-All.vcf.gz ~/chicken/finaldate0.2/chicken_chr1-33_zk_tc_zs.vcf >chicken_chr1-33_zk_tc_zs_rs.vcf
11.計(jì)算次要等位基因頻率
plink --vcf chicken_chr1-33_zk_tc.vcf.gz --chr-set 33 --freq --out chicken_chr1-33_zk_tc
12.提取紅色原雞的個(gè)體文件
vcftools --gzvcf chicken_chr1-33.vcf.gz --keep Red_junglefowl.txt --recode --stdout | bgzip -c >Red_junglefowl2.vcf.gz
GWAS
1.給填充過(guò)的VCF文件加注釋
cat chicken_chr1-33_zk_tc.vcf | head -n 10 > header
cat chicken_chr1-33_zk_tc.vcf | grep "^#" -v | awk '{$3=$1"_"$2;print $0}' | sed 's/ /\t/g' > body
cat header body > chicken_chr1-33_zk_tcID.vcf
2.將vcf轉(zhuǎn)成ped
plink --vcf chicken_chr1-33_zk_tcID.vcf --chr-set 33 --recode --out chicken_chr1-33_zk_tcID
3.基于連鎖不平衡過(guò)濾SNP:我感覺(jué)可以不用觉啊,因?yàn)槲乙抑丿B位點(diǎn)
plink --file chicken_chr1-33_zk_tcID --chr-set 33 --indep 10 5 3 #10代表窗口大小,5代表步長(zhǎng)沈贝,3代表1/3杠人,即窗口內(nèi)每對(duì)snpLD值大于1/3就會(huì)刪除一對(duì)SNP中的一個(gè)
plink --file chicken_chr1-33_zk_tcID --extract plink.prune.in --chr-set 33 --recode --out chicken_chr1-33_zk_tcID_LDfilter
4.將ped/map轉(zhuǎn)換為fam/bed/bim
plink --file chicken_chr1-33_zk_tcID --chr-set 33 --make-bed --out chicken_chr1-33_zk_tcID
5.檢驗(yàn)表型數(shù)據(jù)是否服從正態(tài)分布
x<-read.table(file = "100個(gè)體正太性.txt",header=T)
shapiro.test(x)
6.把表型數(shù)據(jù)轉(zhuǎn)換為正態(tài)分布
用SPSS轉(zhuǎn)換為正態(tài)性數(shù)據(jù):秩分的正態(tài)得分的轉(zhuǎn)化
6.把.fam文件第六列的-9(默認(rèn)值)全部替換為表型值
awk '{print $1" "$2" "$3" "$4" "$5}' chicken_chr1-33_zk_tcID.fam >col5
vim col6
paste col5 col6 >chicken_chr1-33_zk_tcID.fam
awk '{print $1" "$2" "$3" "$4" "$5" "$6}' chicken_chr1-33_zk_tcID.fam >TEST
mv TEST chicken_chr1-33_zk_tcID.fam
7.使用gemma進(jìn)行運(yùn)行
gemma -bfile chicken_chr1-33_zk_tcID -gk 2 -o kin #生成K(親緣關(guān)系)矩陣
gemma -bfile chicken_chr1-33_zk_tcID -k kin.sXX.txt -lmm 1 -o GE_GWAS #-lmm 1 使用wald檢驗(yàn)計(jì)算顯著性
8.做pca,把前5個(gè)pca當(dāng)作協(xié)變量
plink --bfile chicken_chr1-33_zk_tcID --chr-set 33 --pca 5 --out chicken_chr1-33_zk_tcID_pca #myfile_pca.eigenvec即為我們所需的PCA文件也為協(xié)變量的輸入文件
9.看PCA圖是否把群體分群
data<-read.table("PCA.txt",header = T,sep = "\t")
ggplot(data,aes(x=PCA1,y=PCA2,colour=group))+ geom_point()
ggsave("pca.tiff")
10.加入?yún)f(xié)變量計(jì)算GWAS
同8.使用simpleM腳本計(jì)算Meff值,該值作為Bonferroni矯正的分母
首先需要準(zhǔn)備編碼的SNP文件宋下,自己編寫(xiě)腳本進(jìn)行轉(zhuǎn)換
cat chicken_chr1-33_zk_tc.vcf | grep "^#" -v | awk '{$3=$1"_"$2;print $0}' | sed 's/ /\t/g' > body
awk '{for(i=10;i<NF;i++)printf("%s ",$i);print $NF}' body >bodyGT
sed -i "s/0|0/0/g" bodyGT
sed -i "s/0|1/1/g" bodyGT
sed -i "s/1|0/1/g" bodyGT
sed -i "s/1|1/2/g" bodyGT
mv bodyGT snpSample.txt
Rscript simpleM_Ex.R
出錯(cuò):需要自己刪掉全是1的行
這里是刪掉全是0的行和列的代碼
awk '{show=0; for (i=1; i<=NF; i++) {if ($i!=0) show=1; col[i]+=$i;}} show==1{tr++; for (i=1; i<=NF; i++) vals[tr,i]=$i; tc=NF} END{for(i=1; i<=tr; i++) { for (j=1; j<=tc; j++) { if (col[j]>0) printf("%s%s", vals[i,j], OFS)} print ""; } }' snpSample.txt >snpSample0.txt
因?yàn)闆](méi)有理解代碼嗡善,我的思路是先把所有的0替換成3,再把所有的1替換為0学歧,這樣可以把全為1的行去掉罩引,然后把0替換為1,把3替換為0枝笨,就可以完成替換的操作
sed -i "s/0/3/g" snpSample.txt
sed -i "s/1/0/g" snpSample.txt
awk '{show=0; for (i=1; i<=NF; i++) {if ($i!=0) show=1; col[i]+=$i;}} show==1{tr++; for (i=1; i<=NF; i++) vals[tr,i]=$i; tc=NF} END{for(i=1; i<=tr; i++) { for (j=1; j<=tc; j++) { if (col[j]>0) printf("%s%s", vals[i,j], OFS)} print ""; } }' snpSample.txt >snpSample1.txt #大概8分鐘
wc -l snpSample1.txt
sed -i "s/0/1/g" snpSample1.txt
sed -i "s/3/0/g" snpSample1.txt
Rscript simpleM_Ex.R
計(jì)算等位基因頻率差
vcftools --gzvcf H1.vcf.gz --freq2 --out H1
sed -i '1d' H1.frq
vim header
chr pos allele N_chr ref_frq ALT_frq
cat header H1.frq> H1.freq
setwd("D:/迅雷下載/選擇信號(hào)分析final0.2/Freq/")
h1<-read.table(file="H1.freq",header = T)
h2<-read.table(file="H2.freq",header = T)
h3<-read.table(file="H3.freq",header = T)
L1<-read.table(file="L1.freq",header = T)
L2<-read.table(file="L2.freq",header = T)
L3<-read.table(file="L3.freq",header = T)
H1_L1 <- data.frame(CHR = h1$chr, POS = h1$pos,F = h1$ref_frq - L1$ref_frq )
H2_L2 <- data.frame(CHR = h2$chr, POS = h2$pos,F = h2$ref_frq - L2$ref_frq )
H3_L3 <- data.frame(CHR = h3$chr, POS = h3$pos,F = h3$ref_frq - L3$ref_frq )
write.csv(H1_L1,"F_H1-L1.csv",row.names = FALSE)
write.csv(H2_L2,"F_H2-L2.csv",row.names = FALSE)
write.csv(H3_L3,"F_H3-L3.csv",row.names = FALSE)
F3<-H1_L1
F2<-H2_L2
F1<-H3_L3
F1$SNP<-seq(1,nrow(F1),1)
F2$SNP<-seq(1,nrow(F2),1)
F3$SNP<-seq(1,nrow(F3),1)
library(doBy)
library(plyr)
n<- c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
for (i in n){F1$SNP[which(F1$CHR==i)]<-F1$SNP[which(F1$CHR==i)]+38000*(i-1)-233*(i-2)*(i-1)} #28500
chr <- summaryBy(SNP~CHR, F1, FUN = median)
for (i in n){F2$SNP[which(F2$CHR==i)]<-F2$SNP[which(F2$CHR==i)]+38000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHR, F2, FUN = median)
for (i in n){F3$SNP[which(F3$CHR==i)]<-F3$SNP[which(F3$CHR==i)]+38000*(i-1)-233*(i-2)*(i-1)}
chr <- summaryBy(SNP~CHR, F3, FUN = median)
f<-cbind(F1,F2[,3],F3[,3])
f<-na.omit(f)
tdzheng<-subset(f,f$F> f$`F2[, 3]`&f$`F2[, 3]`> f$`F3[, 3]`)
tdfu<-subset(f,f$F< f$`F2[, 3]`&f$`F2[, 3]`<f$`F3[, 3]`)
top1<-subset(tdzheng,tdzheng$F>quantile(tdzheng$F, 0.995)&tdzheng$`F3[, 3]`>0)
csv1<-top1[,c(1:3,5:6)]
write.csv(csv1,"top0.005.csv",row.names = FALSE)
top2<-subset(tdfu,tdfu$F<quantile(tdfu$F, 0.005)&tdfu$`F3[, 3]`<0)
csv2<-top2[,c(1:3,5:6)]
write.csv(csv2,"low0.005.csv",row.names = FALSE)
library(ggplot2)
p2<-ggplot()+
geom_point(aes(SNP,F1$F), data = F1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=1) +
geom_point(aes(SNP,F2$F), data = F2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=1) +
geom_point(aes(SNP,F3$F), data = F3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=1) +
geom_point(aes(SNP,top1$F,color="High"), data = top1,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top1$`F2[, 3]`,color="Mid"), data = top1,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top1$`F3[, 3]`,color="Low"), data = top1,pch=19, alpha=1, size=1.2) +
geom_point(aes(SNP,top2$F,color="High"), data = top2,pch=19, alpha=1, size=1.6) +
geom_point(aes(SNP,top2$`F2[, 3]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
geom_point(aes(SNP,top2$`F3[, 3]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
theme_bw(base_size = 16)+
theme(panel.grid = element_blank(),
axis.line = element_line(colour = 'black'),
panel.background = element_rect(fill = "transparent"),
axis.title.y = element_text(face = "bold"),
legend.position = "right",
legend.text = element_text(face = "bold"),
legend.background = element_rect(colour = '#DCDCDC'),
legend.title = element_text(face = "bold"))+
labs(x = 'Chromosome', y = expression("AFD")) +
scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.05,0)))+
scale_y_continuous(expand = c(0.1,0),breaks =seq(-0.5,0.5,0.2),limits=c(-0.5,0.5))+
scale_color_manual(name="Group",values = c("High"="red","Mid"="#22ff96","Low"="black"),limit=c("High","Mid","Low"))+
geom_hline(aes(yintercept=0),col="black",lty=2,lwd=0.5)+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
ggsave("freq5.tiff",width = 12,height = 2.5)
畫(huà)LD block圖袁铐,輸入文件的數(shù)目必須和要畫(huà)的snp數(shù)目一致
cd ~/soft/haploview
wget -c https://www.broadinstitute.org/ftp/pub/mpg/haploview/Haploview.jar
java -jar Haploview.jar --h
準(zhǔn)備兩個(gè)輸入文件
一個(gè)為chicken_chr27.ped 一個(gè)為chicken_chr1-33_zk_tcID.info文件
chicken_chr27.ped是用plink的輸出文件
先提取27號(hào)染色體的基因型文件
vim chr27
27 4789482 4989482
bgzip chicken_chr1-33_zk_tcID.vcf
tabix -p vcf chicken_chr1-33_zk_tcID.vcf.gz
bcftools filter chicken_chr1-33_zk_tcID.vcf.gz -R chr27 > chr27.vcf
plink --vcf chr27.vcf --chr-set 27 --recode --out chicken_chr27
chicken_chr1-33_zk_tcID.info文件需要自己設(shè)置,第一列為snpID,第二列為位置伺帘,可以加第三列昭躺,有第三列的SNP會(huì)被綠色高亮忌锯,tab鍵分割
我需要的是27號(hào)染色體 4889482上下游100kb內(nèi)的所有snp
awk '{if($1=="27")print $0}' chicken_chr1-33_zk_tcID.map >test.info
awk '{if($4<="4989482")print $0}' test.info >test1.info
awk '{if($4>="4789482")print $2"\t"$4}' test1.info >chicken_chr27.info
vim chicken_chr27.info #在顯著的snp后面加上標(biāo)簽
cp chicken_chr27.info chicken_chr27.ped LD_BLOCK
把ped文件里的-9全部換成0
sed -i "s/-9/0/g" chicken_chr27.ped
cd ~/chicken/final0.2/GWAS/LD_BLOCK
java -Xmx16g -jar ~/soft/haploview/Haploview.jar -n -q -log chicken_chr27.log -pedfile chicken_chr27.ped -info chicken_chr27.info -blockoutput GAB -png -check -mendel -dprime -compressedpng -includeTagsFile
cat Gallus_gallus.GRCg6a.101.gtf | grep "^#" -v | awk '{$2=null;$3=null;$6=null;$7=null;$8=null;print $0}' >Gallus_gallus.GRCg6a.101.bed #去掉文件中的某幾列
sed '/^$/d' test.txt|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}' #求文件的最大值
可變剪切
conda activate rmats
echo "H1_sort.bam H2_sort.bam H4_sort.bam" | tr ' ' '\n' > UVJ_H.txt
echo "L1_sort.bam L2_sort.bam L3_sort.bam" | tr ' ' '\n' > UVJ_L.txt
RNASeq-MATS.py --b1 UVJ_H.txt --b2 UVJ_L.txt --gtf ~/reference/annotate/chicken/Gallus_gallus.GRCg6a.101.gtf --od /public/jychu/YangGe/cleandata/bam/rMATS/UVJ_H_vs_L_rMATS --tmp /public/jychu/YangGe/cleandata/bam/rMATS/UVJ_H_vs_L_rMATS -t paired --readLength 151 --cstat 0.0001 --nthread 6
雞的選擇信號(hào)分析代碼
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
- 文/潘曉璐 我一進(jìn)店門(mén)龙助,熙熙樓的掌柜王于貴愁眉苦臉地迎上來(lái)砰奕,“玉大人,你說(shuō)我怎么就攤上這事提鸟【” “怎么了?”我有些...
- 文/不壞的土叔 我叫張陵称勋,是天一觀的道長(zhǎng)胸哥。 經(jīng)常有香客問(wèn)我,道長(zhǎng)赡鲜,這世上最難降的妖魔是什么空厌? 我笑而不...
- 正文 為了忘掉前任,我火速辦了婚禮银酬,結(jié)果婚禮上嘲更,老公的妹妹穿的比我還像新娘。我一直安慰自己揩瞪,他們只是感情好哮内,可當(dāng)我...
- 文/花漫 我一把揭開(kāi)白布。 她就那樣靜靜地躺著壮韭,像睡著了一般北发。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上喷屋,一...
- 那天琳拨,我揣著相機(jī)與錄音,去河邊找鬼屯曹。 笑死狱庇,一個(gè)胖子當(dāng)著我的面吹牛,可吹牛的內(nèi)容都是我干的恶耽。 我是一名探鬼主播密任,決...
- 文/蒼蘭香墨 我猛地睜開(kāi)眼,長(zhǎng)吁一口氣:“原來(lái)是場(chǎng)噩夢(mèng)啊……” “哼偷俭!你這毒婦竟也來(lái)了浪讳?” 一聲冷哼從身側(cè)響起,我...
- 序言:老撾萬(wàn)榮一對(duì)情侶失蹤涌萤,失蹤者是張志新(化名)和其女友劉穎淹遵,沒(méi)想到半個(gè)月后口猜,有當(dāng)?shù)厝嗽跇?shù)林里發(fā)現(xiàn)了一具尸體,經(jīng)...
- 正文 獨(dú)居荒郊野嶺守林人離奇死亡透揣,尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
- 正文 我和宋清朗相戀三年济炎,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片辐真。...
- 正文 年R本政府宣布放坏,位于F島的核電站咙咽,受9級(jí)特大地震影響,放射性物質(zhì)發(fā)生泄漏淤年。R本人自食惡果不足惜钧敞,卻給世界環(huán)境...
- 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望麸粮。 院中可真熱鬧溉苛,春花似錦、人聲如沸弄诲。這莊子的主人今日做“春日...
- 文/蒼蘭香墨 我抬頭看了看天上的太陽(yáng)齐遵。三九已至寂玲,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間梗摇,已是汗流浹背拓哟。 一陣腳步聲響...
- 正文 我出身青樓,卻偏偏與公主長(zhǎng)得像糜烹,于是被迫代替她去往敵國(guó)和親违诗。 傳聞我的和親對(duì)象是個(gè)殘疾皇子,可洞房花燭夜當(dāng)晚...
推薦閱讀更多精彩內(nèi)容
- 本文對(duì)比iOS常用的幾種靜態(tài)分析工具疮蹦,分析優(yōu)缺點(diǎn)诸迟,并從中選出適合當(dāng)前工程的工具,本文的應(yīng)用系統(tǒng)為macOS。 cl...
- 針對(duì)企業(yè)信息系統(tǒng)源代碼中可能存在的安全漏洞,可以運(yùn)用基于歷史漏洞信息的智能學(xué)習(xí)技術(shù)進(jìn)行檢測(cè)笛粘。通過(guò)采集海量開(kāi)源代碼并...
- 如今的Java似乎越來(lái)越火熱了垛膝,不過(guò)想想也是鳍侣,畢竟一個(gè)跨平臺(tái)性讓得它在編程語(yǔ)言界稱(chēng)王稱(chēng)霸了多年,連它的前輩C語(yǔ)言都...
- DTMF(Dual Tone Multi Frequency) 雙音多頻惑折,由高頻群和低頻群組成,高低頻群各包含4個(gè)...
- 代碼的第一部分與項(xiàng)目(LED閃燈器--硬件回顧)完全一樣枯跑,這次語(yǔ)句包含三個(gè)獨(dú)立的代碼段惨驶。 第一個(gè)代碼段是輸出三個(gè)點(diǎn)...