參考:
以水稻為例教你如何使用BSA方法進(jìn)行遺傳定位(下篇) - 簡(jiǎn)書(shū) (jianshu.com)
使用QTLseqr進(jìn)行BSA-seq分析 - 簡(jiǎn)書(shū) (jianshu.com)
- 加上擬合線
文獻(xiàn)中有擬合線(黑色的那個(gè)),如何按照每1Mb為窗口,每次移動(dòng)10kb計(jì)算均值
以KY0DN1為例
calcValueByWindow <- function(pos, value,
window_size = 1000000,
step_size = 100000){
# 找到最大位置
max_pos <- max(pos)
# 構(gòu)建窗口
#window_start:seq等差數(shù)列执俩,以最大位置加窗口大小作為最大值模聋,步長(zhǎng)為公差
#window_end:起始位置加步長(zhǎng)
window_start <- seq(0, max_pos + window_size,step_size)
window_end <- window_start + step_size
mean_value <- vector(mode = "numeric", length = length(window_start))
# 選擇窗口內(nèi)的值
for (j in seq_along(window_start)){
pos_in_window <- which(pos > window_start[j] &
pos < window_end[j])
value_in_window <- value[pos_in_window]
mean_value[j] <- mean(value_in_window)
}
# remove the Not A Number position
nan_pos <- is.nan(mean_value)
mean_value <- mean_value[! nan_pos]
window_pos <- ((window_start + window_end)/ 2)[!nan_pos]
df <- data.frame(pos = window_pos,
value = mean_value)
return(df)
}
par(mfrow = c(3,4))
for (i in paste0("chr", formatC(1:12, width = 2, flag=0)) ){
freq_flt <- freq2[grepl(i,row.names(freq2)), ]
pos <- as.numeric(substring(row.names(freq_flt), 7))
snp_index <- freq_flt[,1] - freq_flt[,2]
# bin
df <- calcValueByWindow(pos = pos, value = snp_index)
plot(x = pos, y =snp_index,
ylim = c(-1,1),
pch = 20, cex = 0.2,
xlab = i,
ylab = expression(paste(Delta, " " ,"SNP index")))
lines(x = df$pos, y = df$value, col = "red")
}
2.QTLseqr
devtools::install_github("bmansfeld/QTLseqr")
library(QTLseqr)
library(vcfR)
#建議開(kāi)始前都清空一下變量察藐,不然容易出錯(cuò)
rm(list = ls())
#設(shè)置工作路徑
setwd("~/workspace/BSA/practice/")
#加載數(shù)據(jù)续徽,
vcf <- read.vcfR("4.variants_filter/snps.vcf")
chrom <- getCHROM(vcf)
pos <- getPOS(vcf)
ref <- getREF(vcf)
alt <- getALT(vcf)
ad <- extract.gt(vcf, "AD")
ref_split <- masplit(ad, record = 1, sort = 0)
alt_split <- masplit(ad, record = 2, sort = 0)
gt <- extract.gt(vcf, "GT")
#生成一個(gè)適用QTLseqr包importFromTable()函數(shù)的數(shù)據(jù)框
df <- data.frame(CHROM = chrom,
POS = pos,
REF = ref,
ALT = alt,
AD_REF.SRR6327817 = ref_split[,3],
AD_ALT.SRR6327817 = alt_split[,3],
AD_REF.SRR6327818 = ref_split[,4],
AD_ALT.SRR6327818 = alt_split[,4]
)
mask <- which(gt[,"SRR6327815"] != "0/1" & gt[,"SRR6327816"] == "0/1")
df <- df[mask,]
write.table(df, file = "rice.tsv", sep = "\t", row.names = F, quote = F)
#讀取數(shù)據(jù)
df <- importFromTable("rice.tsv",
highBulk = "SRR6327817",
lowBulk = "SRR6327818",
chromList = paste0("chr", formatC(1:12, width = 2, flag=0)),
sep = "\t")
#刪去SNPindex為NA的值
df <- subset(df, !is.na(SNPindex.LOW) & !is.na(SNPindex.HIGH))
#G統(tǒng)計(jì)值
df <- runGprimeAnalysis(SNPset = df,
windowSize = 1e6,
outlierFilter = "deltaSNP")
#delta SNP置信區(qū)間
df <- runQTLseqAnalysis(SNPset = df,
windowSize = 1e6,
popStruc = "RIL",
bulkSize = c(20,20))
#繪圖
plotQTLStats(
SNPset = df,
var = "Gprime",
plotThreshold = TRUE,
q = 0.01
)
plotQTLStats(
SNPset = df,
var = "deltaSNP",
plotIntervals = TRUE)
- ggplot2繪圖
跟著文獻(xiàn)里的圖畫(huà)的,努力在還原了…
ggplot(data = df,aes(x = POS,y = deltaSNP)) + #映射x、y軸
geom_point(aes(color=as.factor(CHROM)), #按照CHROM進(jìn)行分組
alpha=0.8, size=0.8,position ="jitter") +
facet_wrap(~CHROM,ncol = 12,scales = "free_x",strip.position = 'bottom') + #分成12列唉地,x軸設(shè)置一下自由尺度,分面標(biāo)簽位置改為bottom
geom_smooth(method = 'gam',fullrange = TRUE,
size = 0.7,color = "black",
se=FALSE) + #加上擬合線,se=FLASE為限制置信區(qū)間渣蜗,相當(dāng)于去掉擬合線附近的陰影
ylim(0,1) +#設(shè)置一下y軸范圍
ylab(expression(paste(Delta, " " ,"SNP index")))+ #更改y軸標(biāo)簽
theme(
legend.position="none",
panel.border = element_blank(), #繪圖區(qū)邊框
panel.grid.major.x = element_blank(), #主網(wǎng)格線
panel.grid.minor.x = element_blank(), #次網(wǎng)格線
panel.spacing.x = unit(0, "cm"), #分面之間的x軸方向距離
strip.placement = "outside", #設(shè)置分面標(biāo)簽位于圖的外側(cè)還是內(nèi)側(cè)
strip.background.x = element_rect(color = "white",fill = "white"), #分面標(biāo)簽背景設(shè)為白色
axis.text.x = element_blank() #刪去x軸的刻度
)
這里的擬合線我是直接用的lm屠尊,但感覺(jué)還是1里的那個(gè)線比較好。
但是1里是分了12條染色體進(jìn)行繪制的耕拷,我再想想怎么樣在這個(gè)圖里加上1里的擬合線讼昆。
2022.5.17更新
對(duì)數(shù)據(jù)處理改了一下,上面那個(gè)是按照binmapr算出來(lái)的deltaSNP畫(huà)的圖
感覺(jué)結(jié)果與文獻(xiàn)里不太符合骚烧,換了一下參數(shù)設(shè)置浸赫,(SRR17和SRR18換了個(gè)位置),然后就是加了一條y=0.5的水平虛線
至于那個(gè)文章中的擬合曲線赃绊,還沒(méi)想到怎么樣可以加上去既峡。。待我再思考思考碧查。运敢。