多線(xiàn)程計(jì)算
library(parallel)
查看電腦當(dāng)前核數(shù)
cl.cores <- detectCores()
cl.cores
初始化10核心集群
cl <- makeCluster(10)
差異分析-mRNA####
library(tidyverse)
library("limma")
library("edgeR")
數(shù)據(jù)加載
load("diffmRNA//mRNA.Rdata")
expermRNA[1:3,1:3]
根據(jù)列名區(qū)分腫瘤組和對(duì)照組
group <- ifelse(str_sub(colnames(expermRNA), 14, 15) == "01",
"tumor", "control")
table(group)
group_list = factor(group)#先前是字符串,現(xiàn)在是因子了
差異分析
design <- model.matrix(~ 0 + group_list) #構(gòu)建分組矩陣黄选,這里面不用糾結(jié)~0是什么含義杨幼,我見(jiàn)了這么多常摧,發(fā)現(xiàn)它都是用~0的,所以用就行了立美,不必多想
rownames(design) <- colnames(expermRNA)
colnames(design) <- levels(group_list)
expermRNA = expermRNA[rowMeans(expermRNA) > 1, ] #過(guò)濾低表達(dá)基因
edgeR
y <- DGEList(counts = expermRNA,
group = group_list) #構(gòu)建DGElist對(duì)象
y <- calcNormFactors(y) #TMM標(biāo)準(zhǔn)化
y <- estimateCommonDisp(y) #估算離散值
y <- estimateTagwiseDisp(y)
et <- exactTest(y, pair = levels(group_list)) #差異分析
gene1 <- decideTestsDGE(et,
p.value = 0.01,
lfc = 2) #設(shè)置閾值
summary(gene1) #查看差異分析結(jié)果
提取所有差異分析結(jié)果
topTags(et)
ordered_tags <- topTags(et, n = 100000)
allDEG = ordered_tags$table
allDEG = allDEG[is.na(allDEG$FDR) == FALSE, ]
write.table(allDEG, 'diffmRNA//allDEG.txt', sep = '\t')#這是總的差異基因翻擒,但是并不是全部會(huì)用到,還需要進(jìn)行篩選渊抽,這里進(jìn)行了數(shù)據(jù)保存
提取篩選后的結(jié)果
diff_signif <- allDEG %>% rownames_to_column("rownames")
filter(.,PValue < 0.01, abs(logFC)> 2) %>%
arrange(.,logFC)
rownames(diff_signif) <- diff_signif[,1]
diff_signif<- diff_signif[,-1]
write.csv(diff_signif,file = 'diffmRNA//DIFmRNA.txt')
區(qū)分高低表達(dá)
sigmRNA <- diff_signif %>%
rownames_to_column("mRNA") %>%
mutate(Regulation = ifelse(logFC < 0, "DOWN", "UP")) %>%
select(mRNA,Regulation)
write.csv(sigmRNA,file = 'diffmRNA//sigmRNA.txt')
#rownames_to_column 可以將行名自動(dòng)添加為一列,并命名為“mRNA”
#mutate 函數(shù)新建了一個(gè)名為“Regulation”的列
volcano
allDiff <- ordered_tags$table
pdf("diffmRNA//vol.pdf",12,12)
allDiff2=allDiff[-(allDiff$FDR==0),]
xMax=max(-log10(allDiff2$FDR))+1
yMax=12
plot(-log10(allDiff2$FDR),
allDiff2$logFC,
xlab="-log10(FDR)",
ylab="logFC",
main="Volcano",
xlim=c(0,xMax),
ylim=c(-yMax,yMax),
yaxs="i",pch=20,
col="gray", cex=0.4)
diffSub=allDiff[allDiff$FDR<0.01 & allDiff$logFC>2,]
points(-log10(diffSub$FDR),
diffSub$logFC, pch=20,
col="red",cex=0.4)
diffSub=allDiff[allDiff$FDR<0.01 & allDiff$logFC<(-2),]
points(-log10(diffSub$FDR),
diffSub$logFC, pch=20,
col="skyblue",cex=0.4)
abline(h=0,lty=2,lwd=3)
dev.off()
heatmap
newData=y$pseudo.counts
heatmapData <- newData[rownames(diff_signif),]
hmExp=log10(heatmapData+0.001)
library('gplots')
hmMat=as.matrix(hmExp)
pdf(file="diffmRNA//heatmap.pdf",12,15)
par(oma=c(10,3,3,7))
heatmap.2(hmMat,
col='bluered',trace="none")#這個(gè)函數(shù)運(yùn)算的時(shí)間會(huì)比較長(zhǎng)的哦
dev.off()