WGCNA-2實(shí)戰(zhàn)

FemaleLiver-Data為例

#設(shè)置工作目錄
setwd("D:/Desktop/WGCNA/FemaleLiver-Data/")
#加載包
library(WGCNA)
library(data.table)
library(stringr)
library(openxlsx)
allowWGCNAThreads()
options(stringsAsFactors = FALSE)

1.數(shù)據(jù)準(zhǔn)備

#讀取表達(dá)數(shù)據(jù)
femData <- read.csv("LiverFemale3600.csv")
datExpr0 = as.data.frame(t(femData[, -c(1:8)]));
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)];
#檢查數(shù)據(jù)
gsg = goodSamplesGenes(datExpr0, verbose = 3);
if (!gsg$allOK)
{
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0)
    printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0)
    printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
  # Remove the offending genes and samples from the data:
  datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}
#構(gòu)建樣本距離樹
sampleTree = hclust(dist(datExpr0), method = "average");
#樣本聚類圖,查看是否有離群點(diǎn)
pdf(file = "sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
     cex.axis = 1.5, cex.main = 2)
abline(h = 15, col = "red");
dev.off();
Sample clustering to detect outliers
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

#讀取性狀數(shù)據(jù)
traitData = read.csv("ClinicalTraits.csv");
dim(traitData)
names(traitData)
# remove columns that hold information we do not need.
allTraits = traitData[, -c(31, 16)];
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
# Form a data frame analogous to expression data that will hold the clinical traits.
femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage();
#重新構(gòu)建樣本距離樹
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed=FALSE);
# Plot the sample dendrogram and the colors underneath.
pdf("Sample dendrogram and trait heatmap.pdf")
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits),
                    main = "Sample dendrogram and trait heatmap")
dev.off()
Sample dendrogram and trait heatmap

2.選擇合適的power值

# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
power=sft$powerEstimate
# Plot the results:
pdf("soft-thresholding.pdf")
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
soft-thresholding
#查看是否符合無尺度分布
pdf("k.pdf")
k <- softConnectivity(datExpr,power=power)
par(mfrow=c(1,2))
hist(k)
scaleFreePlot(k,main="Check Scale free topology\n")
dev.off()
k

3.構(gòu)建模塊

#one-step
net = blockwiseModules(datExpr, power = power,
                       TOMType = "unsigned", minModuleSize = 30,
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "femaleMouseTOM",
                       verbose = 3)
table(net$colors)
# open a graphics window
# Convert labels to colors for plotting
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)# Plot the dendrogram and the module colors underneath
MEs = orderMEs(net$MEs);
geneTree = net$dendrograms[[1]];
pdf("Module colors.pdf")
plotDendroAndColors(geneTree, moduleColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
dev.off()
Module colors

4.TOM結(jié)構(gòu)

TOM=TOMsimilarityFromExpr(datExpr, power = power);  
dissTOM = 1-TOM
plotTOM = dissTOM^power; 
diag(plotTOM) = NA; 
#TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#隨機(jī)選取部分基因作圖
nSelect = 400
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];
# Open a graphical window
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^power;
diag(plotDiss) = NA;
pdf("Network heatmap plot, selected genes.pdf")
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
dev.off()
Network heatmap plot, selected genes

翰绊??這個顏色似乎是反過來的……求指點(diǎn)

#模塊間相似矩陣
pdf("Eigengene adjacency heatmap.pdf")
plotEigengeneNetworks(MEs, 
                      "Eigengene adjacency heatmap", 
                      marHeatmap = c(3,4,2,2), 
                      plotDendrograms = FALSE, 
                      xLabelsAngle = 90)
dev.off()
Eigengene adjacency heatmap

5.模塊-性狀關(guān)系圖尋找顯著性狀和模塊

moduleTraitCor<-cor(MEs,datTraits,use="p")
moduleTraitPvalue<-corPvalueStudent(moduleTraitCor,nSamples)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
# Display the correlation values within a heatmap plot
pdf("Module-trait relationships.pdf")
par(mar = c(6, 8.5, 3, 3));
labeledHeatmap(Matrix = moduleTraitCor,
                xLabels = names(datTraits),
                yLabels = names(MEs),
                ySymbols = names(MEs),
                colorLabels = FALSE,
                colors = blueWhiteRed(50),
                textMatrix = textMatrix,
                setStdMargins = FALSE,
                cex.text = 0.5,
                zlim = c(-1,1),
                main = paste("Module-trait relationships"))
dev.off()
Module-trait relationships
#可看出weight_g和brown相關(guān)性高形导,下面是不同方面進(jìn)一步驗(yàn)證
#(1)根據(jù)性狀與模塊特征向量基因的相關(guān)性及pvalue來挖掘與性狀相關(guān)的模塊
cor_ADR <- signif(WGCNA::cor(datTraits,MEs,use="p",method="pearson"),5)
p.values <- corPvalueStudent(cor_ADR,nSamples=nrow(datTraits))
Freq_MS_max_cor <- which.max(abs(cor_ADR["weight_g",-which(colnames(cor_ADR) == "MEgrey")]))
Freq_MS_max_p <- which.min(p.values["weight_g",-which(colnames(p.values) == "MEgrey")])

#(2)根據(jù)基因網(wǎng)絡(luò)顯著性鼻弧,也就是性狀與每個基因表達(dá)量相關(guān)性在各個模塊的均值作為該性狀在該模塊的顯著性,顯著性最大的那個模塊與該性狀最相關(guān)
GS1 <- as.numeric(WGCNA::cor(datTraits[,'weight_g'],datExpr,use="p",method="pearson"))
GeneSignificance <- abs(GS1)
ModuleSignificance <- tapply(GeneSignificance,moduleColors,mean,na.rm=T)
which.max(ModuleSignificance[names(ModuleSignificance)!="MEgrey"])

# Isolate weight from the clinical traits
weight = as.data.frame(datTraits$weight_g);
names(weight) = "weight"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, weight))
# Plot the relationships among the eigengenes and the trait
pdf("weight.pdf");
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)
dev.off()
weight
# 從上圖可以看到MEbrown與weight相關(guān)
pdf("module heatmap and the eigengene.pdf")
which.module="brown"
ME=MEs[, paste("ME",which.module, sep="")]
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),
        nrgcols=30,rlabels=F,rcols=which.module,
        main=which.module, cex.main=2)
par(mar=c(5, 4.2, 0, 0.7))
barplot(ME, col=which.module, main="", cex.main=2,
        ylab="eigengene expression",xlab="MPP")
dev.off()
module heatmap and the eigengene
## 模塊內(nèi)基因與表型數(shù)據(jù)關(guān)聯(lián)
# 性狀跟模塊雖然求出了相關(guān)性暇屋,可以挑選最相關(guān)的那些模塊來分析似袁,
# 但是模塊本身仍然包含非常多的基因,還需進(jìn)一步的尋找最重要的基因咐刨。
# 所有的模塊都可以跟基因算出相關(guān)系數(shù)昙衅,所有的連續(xù)型性狀也可以跟基因的表達(dá)
# 值算出相關(guān)系數(shù)。
# 如果跟性狀顯著相關(guān)基因也跟某個模塊顯著相關(guān)定鸟,那么這些基因可能就非常重要
# 而涉。
### 計算模塊與基因的相關(guān)性矩陣

corType="pearson"

if (corType=="pearsoon") {
  geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"))
  MMPvalue = as.data.frame(corPvalueStudent(
             as.matrix(geneModuleMembership), nSamples))
} else {
  geneModuleMembershipA = bicorAndPvalue(datExpr, MEs, robustY=ifelse(corType=="pearson",T,F))
  geneModuleMembership = geneModuleMembershipA$bicor
  MMPvalue   = geneModuleMembershipA$p
}

# 計算性狀與基因的相關(guān)性矩陣

## 只有連續(xù)型性狀才能進(jìn)行計算,如果是離散變量联予,在構(gòu)建樣品表時就轉(zhuǎn)為0-1矩陣婴谱。

if (corType=="pearsoon") {
  geneTraitCor = as.data.frame(cor(datExpr, datTraits, use = "p"))
  geneTraitP = as.data.frame(corPvalueStudent(
             as.matrix(geneTraitCor), nSamples))
} else {
  geneTraitCorA = bicorAndPvalue(datExpr, datTraits, robustY=ifelse(corType=="pearson",T,F))
  geneTraitCor = as.data.frame(geneTraitCorA$bicor)
  geneTraitP   = as.data.frame(geneTraitCorA$p)
}



# 最后把兩個相關(guān)性矩陣聯(lián)合起來,指定感興趣模塊進(jìn)行分析
module = "brown"
pheno = "weight_g"
modNames = substring(colnames(MEs), 3)
# 獲取關(guān)注的列
module_column = match(module, modNames)
pheno_column = match(pheno,colnames(datTraits))
# 獲取模塊內(nèi)的基因
moduleGenes = moduleColors == module
pdf("MMvsGS_brown.pdf")
par(mfrow = c(1,1))
# 與性狀高度相關(guān)的基因,也是與性狀相關(guān)的模型的關(guān)鍵基因
verboseScatterplot(abs(geneModuleMembership[moduleGenes, module_column]),
                   abs(geneTraitCor[moduleGenes, pheno_column]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = paste("Gene significance for", pheno),
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()
MMvsGS_brown
ADJ1=abs(cor(datExpr,use="p"))^power
Alldegrees1=intramodularConnectivity(ADJ1, moduleColors)
colorlevels=unique(moduleColors)
pdf("MMvsGS_total.pdf")
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
par(mar = c(4,5,3,1))
for (i in c(1:length(colorlevels)))
{
  whichmodule=colorlevels[[i]];
  restrict1 = (moduleColors==whichmodule);
  verboseScatterplot(Alldegrees1$kWithin[restrict1],
                     GeneSignificance[restrict1], col=moduleColors[restrict1],
                     main=whichmodule,
                     xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
}
dev.off()
MMvsGS_total
#篩選hub基因
datKME=signedKME(datExpr, MEs, outputColumnName="MM.")
FilterGenes= abs(GS1)> .2 & abs(datKME$MM.brown)>.8
dimnames(data.frame(datExpr))[[2]][FilterGenes]
trait_hubGenes<-colnames(datExpr)[FilterGenes]
pdf("hub_unsigned correlations.pdf")
plotNetworkHeatmap(datExpr,plotGenes = trait_hubGenes,networkType = "unsigned",useTOM = TRUE,power=power,main="unsigned correlations")
dev.off()
hub_unsigned correlations

6.模塊的導(dǎo)出

# Select module gene
genes = colnames(datExpr) ## 
inModule = (moduleColors==module);
modgenes = genes[inModule]; 
## 也是提取指定模塊的基因名
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modgenes, modgenes)
cyt = exportNetworkToCytoscape(
                                 modTOM,
                                 edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
                                 nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
                                 weighted = TRUE,
                                 threshold = 0.02,
                                 nodeNames = modgenes,
                                 nodeAttr = moduleColors[inModule]
);


## 導(dǎo)出樞紐基因到 Cytoscape 
hubGene_TOM <- TOM[FilterGenes,FilterGenes]
dimnames(hubGene_TOM) = list(colnames(datExpr)[FilterGenes],colnames(datExpr)[FilterGenes])
cyt = exportNetworkToCytoscape(hubGene_TOM, 
                                 edgeFile = paste("CytoscapeInput-hub-edges-", paste(module, collapse="-"), ".txt", sep=""), 
                                 nodeFile = paste("CytoscapeInput-hub-nodes-", paste(module, collapse="-"), ".txt", sep=""), 
                                 weighted = TRUE, 
                                 threshold = 0.02, 
                                 nodeNames = trait_hubGenes, 
                                 altNodeNames =trait_hubGenes, 
                                 nodeAttr = moduleColors[FilterGenes]
                                 )
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末躯泰,一起剝皮案震驚了整個濱河市,隨后出現(xiàn)的幾起案子华糖,更是在濱河造成了極大的恐慌麦向,老刑警劉巖,帶你破解...
    沈念sama閱讀 221,548評論 6 515
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件客叉,死亡現(xiàn)場離奇詭異诵竭,居然都是意外死亡,警方通過查閱死者的電腦和手機(jī)兼搏,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 94,497評論 3 399
  • 文/潘曉璐 我一進(jìn)店門卵慰,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人佛呻,你說我怎么就攤上這事裳朋。” “怎么了吓著?”我有些...
    開封第一講書人閱讀 167,990評論 0 360
  • 文/不壞的土叔 我叫張陵鲤嫡,是天一觀的道長送挑。 經(jīng)常有香客問我,道長暖眼,這世上最難降的妖魔是什么惕耕? 我笑而不...
    開封第一講書人閱讀 59,618評論 1 296
  • 正文 為了忘掉前任,我火速辦了婚禮诫肠,結(jié)果婚禮上司澎,老公的妹妹穿的比我還像新娘。我一直安慰自己栋豫,他們只是感情好挤安,可當(dāng)我...
    茶點(diǎn)故事閱讀 68,618評論 6 397
  • 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著笼才,像睡著了一般漱受。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上骡送,一...
    開封第一講書人閱讀 52,246評論 1 308
  • 那天昂羡,我揣著相機(jī)與錄音,去河邊找鬼摔踱。 笑死虐先,一個胖子當(dāng)著我的面吹牛,可吹牛的內(nèi)容都是我干的派敷。 我是一名探鬼主播蛹批,決...
    沈念sama閱讀 40,819評論 3 421
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼篮愉!你這毒婦竟也來了腐芍?” 一聲冷哼從身側(cè)響起,我...
    開封第一講書人閱讀 39,725評論 0 276
  • 序言:老撾萬榮一對情侶失蹤试躏,失蹤者是張志新(化名)和其女友劉穎猪勇,沒想到半個月后,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體颠蕴,經(jīng)...
    沈念sama閱讀 46,268評論 1 320
  • 正文 獨(dú)居荒郊野嶺守林人離奇死亡泣刹,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點(diǎn)故事閱讀 38,356評論 3 340
  • 正文 我和宋清朗相戀三年,在試婚紗的時候發(fā)現(xiàn)自己被綠了犀被。 大學(xué)時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片椅您。...
    茶點(diǎn)故事閱讀 40,488評論 1 352
  • 序言:一個原本活蹦亂跳的男人離奇死亡,死狀恐怖寡键,靈堂內(nèi)的尸體忽然破棺而出掀泳,到底是詐尸還是另有隱情,我是刑警寧澤,帶...
    沈念sama閱讀 36,181評論 5 350
  • 正文 年R本政府宣布开伏,位于F島的核電站膀跌,受9級特大地震影響,放射性物質(zhì)發(fā)生泄漏固灵。R本人自食惡果不足惜捅伤,卻給世界環(huán)境...
    茶點(diǎn)故事閱讀 41,862評論 3 333
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望巫玻。 院中可真熱鬧丛忆,春花似錦、人聲如沸仍秤。這莊子的主人今日做“春日...
    開封第一講書人閱讀 32,331評論 0 24
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽诗力。三九已至凰浮,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間苇本,已是汗流浹背袜茧。 一陣腳步聲響...
    開封第一講書人閱讀 33,445評論 1 272
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留瓣窄,地道東北人笛厦。 一個月前我還...
    沈念sama閱讀 48,897評論 3 376
  • 正文 我出身青樓,卻偏偏與公主長得像俺夕,于是被迫代替她去往敵國和親裳凸。 傳聞我的和親對象是個殘疾皇子,可洞房花燭夜當(dāng)晚...
    茶點(diǎn)故事閱讀 45,500評論 2 359

推薦閱讀更多精彩內(nèi)容