使用WGCNA分析單細胞轉錄組數(shù)據(jù)(懶人版) 2023-05-18

背景介紹

WGCNA是權重基因共表達網(wǎng)絡分析,是常規(guī)轉錄組和單細胞轉錄組的重要分析內容之一,能找到與表型相關聯(lián)的基因模塊技俐,所謂基因模塊其實就是基因集,不過在WGCNA中是有共表達關系的基因集统台。本篇博客介紹WGCNA在單細胞轉錄組中的應用
(這js是不是還有字數(shù)限制啊雕擂,寫了三遍了直秆,一發(fā)布就丟失了保存的內容辑鲤,嗚嗚嗚,一直保存不了舟铜,有毒啊贵扰,而且越來愈多廣告了仇穗!嗐~)


主函數(shù)seurat_wgcna

obj是Seurat對象,
group.by計算假細胞的列名,
size計算假細胞使用的細胞數(shù),例如設置為10表示將10個細胞合并成一個假細胞,
slot使用的數(shù)據(jù)槽戚绕,默認為'data'纹坐,一般不用改,
assay使用的assay,默認為'RNA',
fun計算假細胞使用的計算函數(shù)舞丛,默認為mean,
WGCNA的參數(shù)耘子,一般不用改type = "unsigned",corType = "pearson", pct.mad=0.75,
label輸出文件的標簽,默認為'test',
trait性狀信息球切,默認為NULL谷誓,即meta.data,
使用線程數(shù)nThreads,默認為10欧聘。

  • 使用實例
obj <- readRDS('test.rds')
seurat_wgcna(obj,group.by='celltype',size=100,slot='data',assay='Spatial',fun=mean,
                         type = "unsigned",corType = "pearson", pct.mad=0.75, label='test,trait=NULL,nThreads=20)

所有代碼如下

基本函數(shù)

library(Seurat)
library(data.table)
library(dplyr)

condense_cell <- function(obj,group.by=NULL,size=NULL,slot='data',assay='RNA',fun=mean){
mat <- data.table(t(as.matrix(slot(obj@assays[[assay]],slot))),keep.rownames=T)
meta <- obj@meta.data
meta$Celltype <- meta[,group.by]

cellist <- rownames(meta)

lcod <- c()
lscell <- c()
for (i in unique(meta$Celltype)) {
c1 <- which(meta$Celltype==i)
cellist1 <- cellist[c1]
cod <- floor(seq_along(cellist1)/size)
t1 <- data.frame(table(cod))
t1[,1] <- as.vector(t1[,1])
t1[,2] <- as.vector(t1[,2])
c2 <- which(t1[,2]<size)
short <-t1[,1][c2]
c3 <- which(cod %in% short)
cod[c3] <- t1[,1][which(t1[,2]>=size)[1]]
cod <- paste0(i,"_Cell",cod)
scell <- sample(cellist1)
lcod <- c(lcod,cod)
lscell <- c(lscell,scell)
}

map_df <- data.frame(lscell,lcod)
colnames(map_df) <- c("old_id","new_id")
rownames(map_df) <- map_df$old_id
map_df <- map_df[mat$rn,]
mat$rn <- map_df$new_id

mat <- mat[,lapply(.SD,fun), by=rn]
rownames(mat) <- mat$rn
gename <- colnames(mat)
celname <- rownames(mat)
mat <- t(mat[,rn:=NULL])
colnames(mat) <- celname
rownames(mat) <- gename
obj <- CreateSeuratObject(mat)
return(obj)
}

library(igraph)
plot_network <- function(edges,nodes=NULL,group.by=NULL,weight=NULL,coul = NULL,layout=layout.sphere,pf=NULL,main=""){
if (!is.null(weight)) {
edges$weight <- edges[,weight]
}else{
edges$weight <- 1
}
if (!is.null(group.by)) {
nodes$group <- nodes[,group.by]
}else{
nodes$group <- 'group1'
}
network <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)
library(RColorBrewer)
nlen <- length(unique(V(network)$group))

if (is.null(coul)) {
library(RColorBrewer)
coul  <- c(brewer.pal(8, "Dark2"),brewer.pal(12, "Paired"),brewer.pal(8, "Set2"),brewer.pal(9, "Set1"))
}
coul <- coul[1:nlen]
vertex.color <- coul[as.numeric(as.factor(V(network)$group))]

pdf(paste0(pf,'_',group.by,'_network.pdf'),18,10)
plot(network, vertex.color=vertex.color, edge.width=E(network)$weight*2,layout=layout,main=main)
legend("topright", legend=levels(as.factor(V(network)$group)),
       col = coul, bty = "n", pch=20 , pt.cex = 3,
       cex = 1.5, text.col=coul , horiz = FALSE,
       inset = c(0.1, 0.1))
dev.off()
}

WGCNA分析流程函數(shù)

library(WGCNA)
library(reshape2)
library(stringr)
options(stringsAsFactors = FALSE)
enableWGCNAThreads()

run_wgcna <- function(mat,type = "unsigned",corType = "pearson", pct.mad=0.75, label='test',trait=NULL,tom=F,RsquaredCut=0.85,hug.top=50,power=NULL){
corFnc = ifelse(corType=="pearson", cor, bicor)
maxPOutliers = ifelse(corType=="pearson",1,0.05)
robustY = ifelse(corType=="pearson",T,F)

dataExpr <- mat

#Data QC
##################################################
m.mad <- apply(dataExpr,1,mad)
dataExprVar <- as.matrix(dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 1-pct.mad))[2],0.01)),])
dataExpr <- as.data.frame(t(dataExprVar))

gsg = goodSamplesGenes(dataExpr, verbose = 3)

if (!gsg$allOK){
  if (sum(!gsg$goodGenes)>0)
    printFlush(paste("Removing genes:",
                     paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
  if (sum(!gsg$goodSamples)>0)
    printFlush(paste("Removing samples:",
                     paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
  dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
}

nGenes = ncol(dataExpr)
nSamples = nrow(dataExpr)

sampleTree = hclust(dist(dataExpr), method = "average")
pdf('fig1.sampleTree.pdf',18,10)
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="")
dev.off()
##################################################

#calculate power
##################################################
powers = c(c(1:10), seq(from = 12, to=30, by=2))
sft = pickSoftThreshold(dataExpr, powerVector=powers,networkType=type, verbose=5, RsquaredCut=RsquaredCut)

pdf('fig2.powers.pdf',18,10)
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",
     ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red")
abline(h=0.85,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
     cex=cex1, col="red")
dev.off()
saveRDS(sft,'sft.rds')
saveRDS(dataExpr,'dataExpr.rds')
if (is.null(power)) {
power = sft$powerEstimate
}
print(paste0('The power value is : ',power))
if (is.na(power)){
  power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18),
          ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16),
          ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14),
          ifelse(type == "unsigned", 6, 12))
          )
          )
}
##################################################

#bulid co-expression network
##################################################
net = blockwiseModules(dataExpr, power = power, maxBlockSize = nGenes,
                       TOMType = type, minModuleSize = 30,
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs=TRUE, corType = corType,
                       maxPOutliers=maxPOutliers, loadTOMs=TRUE,
                       saveTOMFileBase = paste0(label, ".tom"),
                       verbose = 3)

moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)

pdf('fig3.plotDendroAndColors.pdf',18,10)
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
dev.off()

MEs = net$MEs
MEs_col = MEs
colnames(MEs_col) = paste0("ME", labels2colors(
  as.numeric(str_replace_all(colnames(MEs),"ME",""))))
MEs_col = orderMEs(MEs_col)


pdf('fig4.plotEigengeneNetworks.pdf',18,10)
plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",
                      marDendro = c(3,3,2,4),
                      marHeatmap = c(3,4,2,2), plotDendrograms = T,
                      xLabelsAngle = 90)
dev.off()
##################################################
#TOMplot
#TOM = TOMsimilarityFromExpr(dataExpr, power=power, corType=corType, networkType=type)
load(net$TOMFiles[1], verbose=T)
TOM <- as.matrix(TOM)
dissTOM = 1-TOM
plotTOM = dissTOM^7
diag(plotTOM) = NA

if (tom) {
pdf('fig5.TOMplot.pdf',18,10)
TOMplot(plotTOM, net$dendrograms, moduleColors,
                main = "Network heatmap plot, all genes")
dev.off()
}
##################################################
#export network information
##################################################
probes = colnames(dataExpr)
dimnames(TOM) <- list(probes, probes)

cyt = exportNetworkToCytoscape(TOM,
             edgeFile = paste(label, ".edges.txt", sep=""),
             nodeFile = paste(label, ".nodes.txt", sep=""),
             weighted = TRUE, threshold = 0,
             nodeNames = probes, nodeAttr = moduleColors)

edges <- read.table(paste(label, ".edges.txt", sep=""),sep='\t',header=T)
nodes <- read.table(paste(label, ".nodes.txt", sep=""),sep='\t',header=T)

library(RColorBrewer)
coul<-c(brewer.pal(8, "Dark2"),brewer.pal(12, "Paired"),brewer.pal(8, "Set2"),brewer.pal(9, "Set1"))

colnames(nodes) <- c(colnames(nodes)[1:2],'group')
lgroup <- unique(nodes$group)
lgroup <- lgroup[-grep('grey',lgroup)]
for (i in lgroup) {
nodes1 <- subset(nodes,group==i)
c1 <- which(edges[,1] %in% nodes1[,1])
edges1 <- edges[c1,]
c1 <- which(edges1[,2] %in% nodes1[,1])
edges1 <- edges1[c1,]
mtitle <- dim(nodes1)[1]
plot_network(edges=edges1,nodes=nodes1,group.by=NULL,weight='weight',coul = coul,layout=layout.sphere,pf=paste0("all_genes_",i),main=mtitle)
}

##################################################
#get hub genes
##################################################

library(dplyr)
edgeData1 <- cyt$edgeData[,c(1,2,3)]
edgeData2 <- cyt$edgeData[,c(2,1,3)]
nodeattrib <- cyt$nodeData[,c(1,3)]
colnames(nodeattrib) <- c("nodeName", "Module")
colnames(edgeData1) <- c("Node1","Node2","Weight")
colnames(edgeData2) <- c("Node1","Node2","Weight")
edgeData <- rbind(edgeData1, edgeData2)
edgeData$Module1 <- nodeattrib[match(edgeData$Node1, nodeattrib$nodeName), 2]
edgeData$Module2 <- nodeattrib[match(edgeData$Node2, nodeattrib$nodeName), 2]
edgeData <- edgeData[edgeData$Module1==edgeData$Module2,c(1,3,4)]

nodeTotalWeight <- edgeData %>% group_by(Node1, Module1) %>% summarise(weight=sum(Weight))
nodeTotalWeight <- nodeTotalWeight[with(nodeTotalWeight, order(Module1, -weight)),]
nodeTotalWeightTop = nodeTotalWeight %>% group_by(Module1) %>% top_n(hug.top, weight) %>% data.frame()
write.table(nodeTotalWeightTop,'hug_gene_list.xls',sep='\t',quote=F)
lgroup <- unique(nodeTotalWeightTop$Module1)
lgroup <- lgroup[-grep('grey',lgroup)]

for (i in lgroup) {
nodes1 <- subset(nodeTotalWeightTop,Module1==i)
c1 <- which(edges[,1] %in% nodes1[,1])
edges1 <- edges[c1,]
c1 <- which(edges1[,2] %in% nodes1[,1])
edges1 <- edges1[c1,]

c1 <- which(nodes[,1] %in% nodes1[,1])
nodes1 <- nodes[c1,]
nodes1 <- subset(nodes1,group==i)

mtitle <- dim(nodes1)[1]
plot_network(edges=edges1,nodes=nodes1,group.by=NULL,weight='weight',coul = coul,layout=layout.sphere,pf=paste0("hug_genes_",i),main=mtitle)
}


#calculate correlation between gene modules and trait

if(!is.null(trait)) {
  traitData = trait
  sampleName = rownames(dataExpr)
  traitData = traitData[match(sampleName, rownames(traitData)), ]

if (corType=="pearsoon") {
  modTraitCor = cor(MEs_col, traitData, use = "p")
  modTraitP = corPvalueStudent(modTraitCor, nSamples)
} else {
  modTraitCorP = bicorAndPvalue(MEs_col, traitData, robustY=robustY)
  modTraitCor = modTraitCorP$bicor
  modTraitP   = modTraitCorP$p
}

textMatrix = paste(signif(modTraitCor, 2), "\n(", signif(modTraitP, 1), ")", sep = "")
dim(textMatrix) = dim(modTraitCor)
pdf('fig6.labeledHeatmap.pdf',18,10)
labeledHeatmap(Matrix = modTraitCor, xLabels = colnames(traitData),
               yLabels = colnames(MEs_col),
               cex.lab = 0.5,
               ySymbols = colnames(MEs_col), colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix, setStdMargins = FALSE,
               cex.text = 0.5, zlim = c(-1,1),
               main = paste("Module-trait relationships"))
dev.off()
}
##################################################
#save result
ob1 <- list(corType,dataExpr, MEs_col, traitData, nSamples, net)
save(corType,dataExpr, MEs_col, traitData, nSamples, net, file=paste0(label,'_result.RData'))
return(ob1)
}

主函數(shù)

seurat_wgcna <- function(obj,group.by=NULL,size=NULL,slot='data',assay='RNA',fun=mean,
                         type = "unsigned",corType = "pearson", pct.mad=0.75, label='test',trait=NULL,nThreads=10){
enableWGCNAThreads(nThreads=nThreads)
if (!is.null(size)) {
obj <- condense_cell(obj,group.by=group.by,size=size,slot=slot,assay=assay,fun=fun)
saveRDS(obj,'condense.rds')
}
trait <- obj@meta.data
run_wgcna(mat=obj@assays$RNA@counts,type = type, corType = corType, pct.mad=pct.mad, label=label,trait=trait)
}

總結與討論

因為單細胞數(shù)據(jù)一般細胞數(shù)太多片林,如果把每個細胞當成一個樣本會極其損耗計算資源端盆,因此構建假細胞是一個較好的辦法怀骤。還有很多細節(jié)沒有具體展開,之后有時間再補充焕妙。

?著作權歸作者所有,轉載或內容合作請聯(lián)系作者
  • 序言:七十年代末蒋伦,一起剝皮案震驚了整個濱河市,隨后出現(xiàn)的幾起案子焚鹊,更是在濱河造成了極大的恐慌痕届,老刑警劉巖,帶你破解...
    沈念sama閱讀 206,013評論 6 481
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件末患,死亡現(xiàn)場離奇詭異研叫,居然都是意外死亡,警方通過查閱死者的電腦和手機璧针,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 88,205評論 2 382
  • 文/潘曉璐 我一進店門嚷炉,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人探橱,你說我怎么就攤上這事申屹』嬷ぃ” “怎么了?”我有些...
    開封第一講書人閱讀 152,370評論 0 342
  • 文/不壞的土叔 我叫張陵哗讥,是天一觀的道長嚷那。 經(jīng)常有香客問我,道長杆煞,這世上最難降的妖魔是什么魏宽? 我笑而不...
    開封第一講書人閱讀 55,168評論 1 278
  • 正文 為了忘掉前任,我火速辦了婚禮决乎,結果婚禮上湖员,老公的妹妹穿的比我還像新娘。我一直安慰自己瑞驱,他們只是感情好娘摔,可當我...
    茶點故事閱讀 64,153評論 5 371
  • 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著唤反,像睡著了一般凳寺。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上彤侍,一...
    開封第一講書人閱讀 48,954評論 1 283
  • 那天肠缨,我揣著相機與錄音,去河邊找鬼盏阶。 笑死晒奕,一個胖子當著我的面吹牛,可吹牛的內容都是我干的名斟。 我是一名探鬼主播脑慧,決...
    沈念sama閱讀 38,271評論 3 399
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼砰盐!你這毒婦竟也來了闷袒?” 一聲冷哼從身側響起,我...
    開封第一講書人閱讀 36,916評論 0 259
  • 序言:老撾萬榮一對情侶失蹤岩梳,失蹤者是張志新(化名)和其女友劉穎囊骤,沒想到半個月后,有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體冀值,經(jīng)...
    沈念sama閱讀 43,382評論 1 300
  • 正文 獨居荒郊野嶺守林人離奇死亡也物,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內容為張勛視角 年9月15日...
    茶點故事閱讀 35,877評論 2 323
  • 正文 我和宋清朗相戀三年,在試婚紗的時候發(fā)現(xiàn)自己被綠了列疗。 大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片滑蚯。...
    茶點故事閱讀 37,989評論 1 333
  • 序言:一個原本活蹦亂跳的男人離奇死亡,死狀恐怖作彤,靈堂內的尸體忽然破棺而出膘魄,到底是詐尸還是另有隱情乌逐,我是刑警寧澤,帶...
    沈念sama閱讀 33,624評論 4 322
  • 正文 年R本政府宣布创葡,位于F島的核電站浙踢,受9級特大地震影響,放射性物質發(fā)生泄漏灿渴。R本人自食惡果不足惜洛波,卻給世界環(huán)境...
    茶點故事閱讀 39,209評論 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望骚露。 院中可真熱鬧蹬挤,春花似錦、人聲如沸棘幸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 30,199評論 0 19
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽误续。三九已至吨悍,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間蹋嵌,已是汗流浹背育瓜。 一陣腳步聲響...
    開封第一講書人閱讀 31,418評論 1 260
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留栽烂,地道東北人躏仇。 一個月前我還...
    沈念sama閱讀 45,401評論 2 352
  • 正文 我出身青樓,卻偏偏與公主長得像腺办,于是被迫代替她去往敵國和親焰手。 傳聞我的和親對象是個殘疾皇子,可洞房花燭夜當晚...
    茶點故事閱讀 42,700評論 2 345

推薦閱讀更多精彩內容