#設(shè)置工作目錄
setwd("D:/Desktop/WGCNA/FemaleLiver-Data/")
#加載包
library(WGCNA)
library(data.table)
library(stringr)
library(openxlsx)
allowWGCNAThreads()
options(stringsAsFactors = FALSE)
1.數(shù)據(jù)準(zhǔn)備
#讀取表達(dá)數(shù)據(jù)
femData <- read.csv("LiverFemale3600.csv")
datExpr0 = as.data.frame(t(femData[, -c(1:8)]));
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)];
#檢查數(shù)據(jù)
gsg = goodSamplesGenes(datExpr0, verbose = 3);
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}
#構(gòu)建樣本距離樹
sampleTree = hclust(dist(datExpr0), method = "average");
#樣本聚類圖,查看是否有離群點(diǎn)
pdf(file = "sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
abline(h = 15, col = "red");
dev.off();
Sample clustering to detect outliers
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#讀取性狀數(shù)據(jù)
traitData = read.csv("ClinicalTraits.csv");
dim(traitData)
names(traitData)
# remove columns that hold information we do not need.
allTraits = traitData[, -c(31, 16)];
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
# Form a data frame analogous to expression data that will hold the clinical traits.
femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage();
#重新構(gòu)建樣本距離樹
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed=FALSE);
# Plot the sample dendrogram and the colors underneath.
pdf("Sample dendrogram and trait heatmap.pdf")
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
dev.off()
Sample dendrogram and trait heatmap
2.選擇合適的power值
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
power=sft$powerEstimate
# Plot the results:
pdf("soft-thresholding.pdf")
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
soft-thresholding
#查看是否符合無尺度分布
pdf("k.pdf")
k <- softConnectivity(datExpr,power=power)
par(mfrow=c(1,2))
hist(k)
scaleFreePlot(k,main="Check Scale free topology\n")
dev.off()
k
3.構(gòu)建模塊
#one-step
net = blockwiseModules(datExpr, power = power,
TOMType = "unsigned", minModuleSize = 30,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "femaleMouseTOM",
verbose = 3)
table(net$colors)
# open a graphics window
# Convert labels to colors for plotting
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)# Plot the dendrogram and the module colors underneath
MEs = orderMEs(net$MEs);
geneTree = net$dendrograms[[1]];
pdf("Module colors.pdf")
plotDendroAndColors(geneTree, moduleColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
Module colors
4.TOM結(jié)構(gòu)
TOM=TOMsimilarityFromExpr(datExpr, power = power);
dissTOM = 1-TOM
plotTOM = dissTOM^power;
diag(plotTOM) = NA;
#TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#隨機(jī)選取部分基因作圖
nSelect = 400
# For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];
# Open a graphical window
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
# the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^power;
diag(plotDiss) = NA;
pdf("Network heatmap plot, selected genes.pdf")
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
dev.off()
Network heatmap plot, selected genes
翰绊??這個顏色似乎是反過來的……求指點(diǎn)
#模塊間相似矩陣
pdf("Eigengene adjacency heatmap.pdf")
plotEigengeneNetworks(MEs,
"Eigengene adjacency heatmap",
marHeatmap = c(3,4,2,2),
plotDendrograms = FALSE,
xLabelsAngle = 90)
dev.off()
Eigengene adjacency heatmap
5.模塊-性狀關(guān)系圖尋找顯著性狀和模塊
moduleTraitCor<-cor(MEs,datTraits,use="p")
moduleTraitPvalue<-corPvalueStudent(moduleTraitCor,nSamples)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
# Display the correlation values within a heatmap plot
pdf("Module-trait relationships.pdf")
par(mar = c(6, 8.5, 3, 3));
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
dev.off()
Module-trait relationships
#可看出weight_g和brown相關(guān)性高形导,下面是不同方面進(jìn)一步驗(yàn)證
#(1)根據(jù)性狀與模塊特征向量基因的相關(guān)性及pvalue來挖掘與性狀相關(guān)的模塊
cor_ADR <- signif(WGCNA::cor(datTraits,MEs,use="p",method="pearson"),5)
p.values <- corPvalueStudent(cor_ADR,nSamples=nrow(datTraits))
Freq_MS_max_cor <- which.max(abs(cor_ADR["weight_g",-which(colnames(cor_ADR) == "MEgrey")]))
Freq_MS_max_p <- which.min(p.values["weight_g",-which(colnames(p.values) == "MEgrey")])
#(2)根據(jù)基因網(wǎng)絡(luò)顯著性鼻弧,也就是性狀與每個基因表達(dá)量相關(guān)性在各個模塊的均值作為該性狀在該模塊的顯著性,顯著性最大的那個模塊與該性狀最相關(guān)
GS1 <- as.numeric(WGCNA::cor(datTraits[,'weight_g'],datExpr,use="p",method="pearson"))
GeneSignificance <- abs(GS1)
ModuleSignificance <- tapply(GeneSignificance,moduleColors,mean,na.rm=T)
which.max(ModuleSignificance[names(ModuleSignificance)!="MEgrey"])
# Isolate weight from the clinical traits
weight = as.data.frame(datTraits$weight_g);
names(weight) = "weight"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, weight))
# Plot the relationships among the eigengenes and the trait
pdf("weight.pdf");
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)
dev.off()
weight
# 從上圖可以看到MEbrown與weight相關(guān)
pdf("module heatmap and the eigengene.pdf")
which.module="brown"
ME=MEs[, paste("ME",which.module, sep="")]
par(mfrow=c(2,1), mar=c(0.3, 5.5, 3, 2))
plotMat(t(scale(datExpr[,moduleColors==which.module ]) ),
nrgcols=30,rlabels=F,rcols=which.module,
main=which.module, cex.main=2)
par(mar=c(5, 4.2, 0, 0.7))
barplot(ME, col=which.module, main="", cex.main=2,
ylab="eigengene expression",xlab="MPP")
dev.off()
module heatmap and the eigengene
## 模塊內(nèi)基因與表型數(shù)據(jù)關(guān)聯(lián)
# 性狀跟模塊雖然求出了相關(guān)性暇屋,可以挑選最相關(guān)的那些模塊來分析似袁,
# 但是模塊本身仍然包含非常多的基因,還需進(jìn)一步的尋找最重要的基因咐刨。
# 所有的模塊都可以跟基因算出相關(guān)系數(shù)昙衅,所有的連續(xù)型性狀也可以跟基因的表達(dá)
# 值算出相關(guān)系數(shù)。
# 如果跟性狀顯著相關(guān)基因也跟某個模塊顯著相關(guān)定鸟,那么這些基因可能就非常重要
# 而涉。
### 計算模塊與基因的相關(guān)性矩陣
corType="pearson"
if (corType=="pearsoon") {
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(
as.matrix(geneModuleMembership), nSamples))
} else {
geneModuleMembershipA = bicorAndPvalue(datExpr, MEs, robustY=ifelse(corType=="pearson",T,F))
geneModuleMembership = geneModuleMembershipA$bicor
MMPvalue = geneModuleMembershipA$p
}
# 計算性狀與基因的相關(guān)性矩陣
## 只有連續(xù)型性狀才能進(jìn)行計算,如果是離散變量联予,在構(gòu)建樣品表時就轉(zhuǎn)為0-1矩陣婴谱。
if (corType=="pearsoon") {
geneTraitCor = as.data.frame(cor(datExpr, datTraits, use = "p"))
geneTraitP = as.data.frame(corPvalueStudent(
as.matrix(geneTraitCor), nSamples))
} else {
geneTraitCorA = bicorAndPvalue(datExpr, datTraits, robustY=ifelse(corType=="pearson",T,F))
geneTraitCor = as.data.frame(geneTraitCorA$bicor)
geneTraitP = as.data.frame(geneTraitCorA$p)
}
# 最后把兩個相關(guān)性矩陣聯(lián)合起來,指定感興趣模塊進(jìn)行分析
module = "brown"
pheno = "weight_g"
modNames = substring(colnames(MEs), 3)
# 獲取關(guān)注的列
module_column = match(module, modNames)
pheno_column = match(pheno,colnames(datTraits))
# 獲取模塊內(nèi)的基因
moduleGenes = moduleColors == module
pdf("MMvsGS_brown.pdf")
par(mfrow = c(1,1))
# 與性狀高度相關(guān)的基因,也是與性狀相關(guān)的模型的關(guān)鍵基因
verboseScatterplot(abs(geneModuleMembership[moduleGenes, module_column]),
abs(geneTraitCor[moduleGenes, pheno_column]),
xlab = paste("Module Membership in", module, "module"),
ylab = paste("Gene significance for", pheno),
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()
MMvsGS_brown
ADJ1=abs(cor(datExpr,use="p"))^power
Alldegrees1=intramodularConnectivity(ADJ1, moduleColors)
colorlevels=unique(moduleColors)
pdf("MMvsGS_total.pdf")
par(mfrow=c(2,as.integer(0.5+length(colorlevels)/2)))
par(mar = c(4,5,3,1))
for (i in c(1:length(colorlevels)))
{
whichmodule=colorlevels[[i]];
restrict1 = (moduleColors==whichmodule);
verboseScatterplot(Alldegrees1$kWithin[restrict1],
GeneSignificance[restrict1], col=moduleColors[restrict1],
main=whichmodule,
xlab = "Connectivity", ylab = "Gene Significance", abline = TRUE)
}
dev.off()
MMvsGS_total
#篩選hub基因
datKME=signedKME(datExpr, MEs, outputColumnName="MM.")
FilterGenes= abs(GS1)> .2 & abs(datKME$MM.brown)>.8
dimnames(data.frame(datExpr))[[2]][FilterGenes]
trait_hubGenes<-colnames(datExpr)[FilterGenes]
pdf("hub_unsigned correlations.pdf")
plotNetworkHeatmap(datExpr,plotGenes = trait_hubGenes,networkType = "unsigned",useTOM = TRUE,power=power,main="unsigned correlations")
dev.off()
hub_unsigned correlations
6.模塊的導(dǎo)出
# Select module gene
genes = colnames(datExpr) ##
inModule = (moduleColors==module);
modgenes = genes[inModule];
## 也是提取指定模塊的基因名
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modgenes, modgenes)
cyt = exportNetworkToCytoscape(
modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.02,
nodeNames = modgenes,
nodeAttr = moduleColors[inModule]
);
## 導(dǎo)出樞紐基因到 Cytoscape
hubGene_TOM <- TOM[FilterGenes,FilterGenes]
dimnames(hubGene_TOM) = list(colnames(datExpr)[FilterGenes],colnames(datExpr)[FilterGenes])
cyt = exportNetworkToCytoscape(hubGene_TOM,
edgeFile = paste("CytoscapeInput-hub-edges-", paste(module, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-hub-nodes-", paste(module, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.02,
nodeNames = trait_hubGenes,
altNodeNames =trait_hubGenes,
nodeAttr = moduleColors[FilterGenes]
)