R語言手動計算Tajima's D 和 Fst

原理和公式參考：

http://www.reibang.com/p/c7ba1aa543fe
wikipedia： https://en.wikipedia.org/wiki/Tajima%27s_D
https://www.google.com.hk/search?q=pvalue+for+tajima%27s+D&newwindow=1&client=safari&ei=URDsYZy_OZavoATIyJSYAg&ved=0ahUKEwjcq5ngxMX1AhWWF4gKHUgkBSMQ4dUDCA0&uact=5&oq=pvalue+for+tajima%27s+D&gs_lcp=Cgdnd3Mtd2l6EAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsAMyBwgAEEcQsANKBAhBGABKBQhAEgExSgQIRhgAUABYAGCb0QRoAXABeACAAQCIAQCSAQCYAQDIAQnAAQE&sclient=gws-wiz
https://zhuanlan.zhihu.com/p/52064863

1. 加載數(shù)據(jù)

##### load data
data<-read.table("mydata.genotype",header=T) #### change this
data<-data[data$Chr=="chr18"] #### select car
data1<-data[,4:dim(data)[2]]

看一眼data

2. 設(shè)定參數(shù)和兩兩個體的排列組合

nindiv<-dim(data1)[2]
nsnp<-dim(data1)[1]
pos_data<-data[,1:2] #### change this 
pop1_num<-1:9  ###### change this （前9個是population1缤骨，后面是population2，
                ######這個只用于計算Fst尺借，對tajima‘s D沒有用）
pop2_num<-10:17  ##### change this
between_or_within_pop<-c()
for(i in 1:(nindiv-1)){
    for(k in (i+1):nindiv){
            print(paste0(i," and ",k))
            if((i %in% pop1_num & k %in% pop2_num) | (i %in% pop2_num & k %in% pop1_num)){
                between_or_within_pop<-c(between_or_within_pop,"between")
        }else{between_or_within_pop<-c(between_or_within_pop,"within")}}
}
#### 17個個體绊起，兩兩組合類型一共136種。
#### 寫清楚哪些是群體間的燎斩，哪些是群體內(nèi)的（Fst計算要用）
#### save between pop pairs num and within pop pair num
nbetween<-sum(between_or_within_pop=="between")
nwithin<-sum(between_or_within_pop=="within")

3. 定義計算函數(shù)

##### define function
tajima_and_fst<-function(input_data){
    theta_matrix<-matrix(nrow=dim(input_data)[1],ncol=choose(nindiv,2),0)
    for(snp in 1:dim(input_data)[1]){
        array<-input_data[snp,]
        pair_count=0
        for(i in 1:(nindiv-1)){
            for(k in (i+1):nindiv){
                    pair_count=pair_count+1
                    if(array[i][1,1]!=array[k][1,1]){
                        if (array[i][1,1]=="-"|array[k][1,1]=="-"){
                            theta_matrix[snp,pair_count]=NA
                        }else{theta_matrix[snp,pair_count]=1}
                    }
            }
        }
    }
    theta_matrix1<-theta_matrix
    theta_matrix1[is.na(theta_matrix)]=0
    #### theta_pi是考慮到NA值的虱歪，使用了總位點數(shù)/非gap位點數(shù) 作為標(biāo)準(zhǔn)化常數(shù)
    nromalized_factor<-dim(theta_matrix1)[1]/apply(!is.na(theta_matrix),2,sum)
    nromalized_factor[nromalized_factor==Inf]<-1
    
    a1<-sum(1/1:(dim(input_data)[1]-1))
    a2<-sum(1/(1:(dim(input_data)[1]-1))**2)
    c1<-b1-1/a1
    c2<-b2-((nindiv+2)/(a1*nindiv))+(a2/a1**2)
    e1<-c1/a1
    e2<-c2/(a1**2+a2)
    S<-sum(apply(theta_matrix1,1,sum)!=0)
        
    theta_pi<-sum(apply(theta_matrix1,2,sum)*nromalized_factor)/
        (combination_times-sum(apply(!is.na(theta_matrix),2,sum)==0)
        )
    theta_w<-S/a1
    var_d<-(e1*S+e2*S*(S-1))**(1/2)
    
    tajimasd<-(theta_pi-theta_w)/var_d
    
    theta_pi_between<-
        sum(apply(theta_matrix1[,between_or_within_pop=="between"],2,sum)*nromalized_factor)/nbetween
    theta_pi_within<-
        sum(apply(theta_matrix1[,between_or_within_pop=="within"],2,sum)*nromalized_factor)/nbetween
    fst<-(theta_pi_between-theta_pi_within)/theta_pi_between
    return(list(tajimasd,fst))
}

4. 定義掃描基因組的sliding window的參數(shù)

##### define sliding window parameters
window_size=20 #### change this
step_wise=1  #### change this
combination_times<-choose(nindiv,2)
b1<-nindiv+1/3*(nindiv-1)
b2<-(2*(nindiv**2+nindiv+3))/(9*nindiv*(nindiv-1))
total_step=(nsnp-window_size)/step_wise
tajimad_list<-c()
fst_list<-c()
window_position<-matrix(nrow=total_step,ncol=2)

5. 啟動 window sliding

##### run sliding windows
for(pace in 1:total_step){
    print(pace/total_step)
    start<-1+step_wise*(pace-1)
    end<-1+step_wise*(pace-1)+window_size
    res<-tajima_and_fst(data1[start:end,])  #### using the function
    new_d<-res[[1]] ### saving result of D
    new_fst<-res[[2]] ### saving result of fst
    tajimad_list<-c(tajimad_list,new_d)
    fst_list<-c(fst_list,new_fst)
    window_position[pace,]<-c(start,end)
}

6. 儲存結(jié)果，畫圖

#### concate output result
out<-data.frame(chr=pos_data[,1][1],
                snp_position_start=pos_data[,2][window_position[,1]],
                snp_position_end=pos_data[,2][window_position[,2]],
                relative_start=window_position[,1],
                relative_end=window_position[,2],
                tajimasd=(tajimad_list-mean(tajimad_list))/sd(tajimad_list),  #### 這里標(biāo)準(zhǔn)化栅表、中心化了
                fst=(fst_list-mean(fst_list))/sd(fst_list))  #### 這里標(biāo)準(zhǔn)化笋鄙、中心化了

write.table(out,"chr18.tajimasd.fst.txt",sep="\t",quote = F,row.names = F)
head(out)

看一眼結(jié)果

7. 畫圖

library(ggplot2)
ggplot(data=out)+
    geom_point(aes(x=(snp_position_start+snp_position_end)/2,y=fst,color="fst"))
ggplot(data=out)+
geom_point(aes(x=(snp_position_start+snp_position_end)/2,y=tajimasd,color="tajimasd2"))

Tajima's D

Fst

暫時還沒搞懂怎么計算P值...好像要序列模擬，比較麻煩怪瓶，后面再思考萧落。或者套個正態(tài)分布看離群值？但這樣有點武斷找岖，而且沒有假設(shè)檢驗陨倡。

Tajima原文中Tajimas' D和計算機序列模擬的比對結(jié)果。因此絕對值大于2應(yīng)該睡rule of thumb许布，應(yīng)該就算顯著兴革。

最后編輯于：2022.02.09 16:38:32

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末，一起剝皮案震驚了整個濱河市蜜唾，隨后出現(xiàn)的幾起案子杂曲，更是在濱河造成了極大的恐慌，老刑警劉巖灵妨，帶你破解...
沈念sama閱讀 206,839評論 6贊 482
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件解阅，死亡現(xiàn)場離奇詭異，居然都是意外死亡泌霍，警方通過查閱死者的電腦和手機货抄，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 88,543評論 2贊 382
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門，熙熙樓的掌柜王于貴愁眉苦臉地迎上來朱转，“玉大人蟹地，你說我怎么就攤上這事√傥” “怎么了怪与？”我有些...
開封第一講書人閱讀 153,116評論 0贊 344
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵，是天一觀的道長缅疟。經(jīng)常有香客問我分别，道長，這世上最難降的妖魔是什么存淫？我笑而不...
開封第一講書人閱讀 55,371評論 1贊 279
?港島之戀（遺憾婚禮）
正文為了忘掉前任耘斩，我火速辦了婚禮，結(jié)果婚禮上桅咆，老公的妹妹穿的比我還像新娘括授。我一直安慰自己，他們只是感情好岩饼，可當(dāng)我...
茶點故事閱讀 64,384評論 5贊 374
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布荚虚。她就那樣靜靜地躺著，像睡著了一般籍茧。火紅的嫁衣襯著肌膚如雪版述。梳的紋絲不亂的頭發(fā)上，一...
開封第一講書人閱讀 49,111評論 1贊 285
城市分裂傳說
那天寞冯，我揣著相機與錄音院水，去河邊找鬼腊徙。笑死，一個胖子當(dāng)著我的面吹牛檬某，可吹牛的內(nèi)容都是我干的撬腾。我是一名探鬼主播，決...
沈念sama閱讀 38,416評論 3贊 400
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼恢恼，長吁一口氣：“原來是場噩夢啊……” “哼民傻！你這毒婦竟也來了？” 一聲冷哼從身側(cè)響起场斑，我...
開封第一講書人閱讀 37,053評論 0贊 259
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤漓踢，失蹤者是張志新（化名）和其女友劉穎，沒想到半個月后漏隐，有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體喧半，經(jīng)...
沈念sama閱讀 43,558評論 1贊 300
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 36,007評論 2贊 325
?白月光啟示錄
正文我和宋清朗相戀三年青责，在試婚紗的時候發(fā)現(xiàn)自己被綠了挺据。大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
茶點故事閱讀 38,117評論 1贊 334
活死人
序言：一個原本活蹦亂跳的男人離奇死亡脖隶，死狀恐怖扁耐，靈堂內(nèi)的尸體忽然破棺而出，到底是詐尸還是另有隱情产阱，我是刑警寧澤婉称，帶...
沈念sama閱讀 33,756評論 4贊 324
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布，位于F島的核電站构蹬，受9級特大地震影響王暗，放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜庄敛，卻給世界環(huán)境...
茶點故事閱讀 39,324評論 3贊 307
男人毒藥：我在死后第九天來索命
文/蒙蒙一瘫筐、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧铐姚，春花似錦、人聲如沸肛捍。這莊子的主人今日做“春日...
開封第一講書人閱讀 30,315評論 0贊 19
一樁弒父案，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽拙毫。三九已至依许，卻和暖如春，著一層夾襖步出監(jiān)牢的瞬間缀蹄，已是汗流浹背峭跳。一陣腳步聲響...
開封第一講書人閱讀 31,539評論 1贊 262
情欲美人皮
我被黑心中介騙來泰國打工膘婶，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留，地道東北人蛀醉。一個月前我還...
沈念sama閱讀 45,578評論 2贊 355
代替公主和親
正文我出身青樓悬襟，卻偏偏與公主長得像，于是被迫代替她去往敵國和親拯刁。傳聞我的和親對象是個殘疾皇子脊岳，可洞房花燭夜當(dāng)晚...
茶點故事閱讀 42,877評論 2贊 345