### Create: Jianming Zeng
### Date: 2019-04-02 21:59:01
### Email: jmzeng1314@163.com
rm(list=ls())
options(stringsAsFactors = F)
Rdata_dir='../Rdata/'
Figure_dir='../figures/'
# 加載上一步從RTCGA.miRNASeq包里面提取miRNA表達矩陣和對應的樣本臨床信息款慨。
# 見 http://www.reibang.com/p/a5f687d2e7b7
load( file =
file.path(Rdata_dir,'TCGA-KIRC-miRNA-example.Rdata')
)
dim(expr)
dim(meta)
# 可以看到是 537個病人谬莹,但是有593個樣本附帽,每個樣本有 552個miRNA信息。
# 當然整胃,這個數據集可以下載原始測序數據進行重新比對屁使,可以拿到更多的miRNA信息
# 這里需要解析TCGA數據庫的ID規(guī)律奔则,來判斷樣本歸類問題。
group_list=ifelse(as.numeric(substr(colnames(expr),14,15)) < 10,'tumor','normal') ##經驗
table(group_list) #71個normal酬蹋,522個tumor
exprSet=na.omit(expr)
source('../functions.R')
DESeq2包范抓、edgeR包和limma包均可獲取DEGs,下面依次展示
### Firstly run DESeq2
if(T){
library(DESeq2)
(colData <- data.frame(row.names=colnames(exprSet),
group_list=group_list) )
dds <- DESeqDataSetFromMatrix(countData = exprSet,
colData = colData,
design = ~ group_list)
tmp_f=file.path(Rdata_dir,'TCGA-KIRC-miRNA-DESeq2-dds.Rdata')
if(!file.exists(tmp_f)){
dds <- DESeq(dds)
save(dds,file = tmp_f)
}
load(file = tmp_f)
res <- results(dds,
contrast=c("group_list","tumor","normal"))
resOrdered <- res[order(res$padj),]
head(resOrdered)
DEG =as.data.frame(resOrdered)
DESeq2_DEG = na.omit(DEG)
nrDEG=DESeq2_DEG[,c(2,6)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
draw_h_v(exprSet,nrDEG,'DEseq2',group_list,1)
}
### Then run edgeR
###
### ---------------
if(T){
library(edgeR)
d <- DGEList(counts=exprSet,group=factor(group_list))
keep <- rowSums(cpm(d)>1) >= 2
table(keep)
d <- d[keep, , keep.lib.sizes=FALSE]
d$samples$lib.size <- colSums(d$counts)
d <- calcNormFactors(d)
d$samples
dge=d
design <- model.matrix(~0+factor(group_list))
rownames(design)<-colnames(dge)
colnames(design)<-levels(factor(group_list))
dge=d
dge <- estimateGLMCommonDisp(dge,design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)
fit <- glmFit(dge, design)
# https://www.biostars.org/p/110861/
lrt <- glmLRT(fit, contrast=c(-1,1))
nrDEG=topTags(lrt, n=nrow(dge))
nrDEG=as.data.frame(nrDEG)
head(nrDEG)
edgeR_DEG =nrDEG
nrDEG=edgeR_DEG[,c(1,5)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
draw_h_v(exprSet,nrDEG,'edgeR',group_list,1)
}
### Lastly run voom from limma
if(T){
suppressMessages(library(limma))
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
rownames(design)=colnames(exprSet)
design
dge <- DGEList(counts=exprSet)
dge <- calcNormFactors(dge)
logCPM <- cpm(dge, log=TRUE, prior.count=3)
v <- voom(dge,design,plot=TRUE, normalize="quantile")
fit <- lmFit(v, design)
group_list
cont.matrix=makeContrasts(contrasts=c('tumor-normal'),levels = design)
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)
tempOutput = topTable(fit2, coef='tumor-normal', n=Inf)
DEG_limma_voom = na.omit(tempOutput)
head(DEG_limma_voom)
nrDEG=DEG_limma_voom[,c(1,4)]
colnames(nrDEG)=c('log2FoldChange','pvalue')
draw_h_v(exprSet,nrDEG,'limma',group_list,1)
}
tmp_f=file.path(Rdata_dir,'TCGA-KIRC-miRNA-DEG_results.Rdata')
if(file.exists(tmp_f)){
save(DEG_limma_voom,DESeq2_DEG,edgeR_DEG, file = tmp_f)
}else{
load(file = tmp_f)
}
nrDEG1=DEG_limma_voom[,c(1,4)]
colnames(nrDEG1)=c('log2FoldChange','pvalue')
nrDEG2=edgeR_DEG[,c(1,5)]
colnames(nrDEG2)=c('log2FoldChange','pvalue')
nrDEG3=DESeq2_DEG[,c(2,6)]
colnames(nrDEG3)=c('log2FoldChange','pvalue')
mi=unique(c(rownames(nrDEG1),rownames(nrDEG1),rownames(nrDEG1)))
lf=data.frame(lf1=nrDEG1[mi,1],
lf2=nrDEG2[mi,1],
lf3=nrDEG3[mi,1])
cor(na.omit(lf))
[圖片上傳中...(image.png-f55ef-1557501086273-0)]
# 可以看到采取不同R包犁柜,會有不同的歸一化算法,這樣算到的logFC會稍微有差異扒腕。而且up&down基因數量也有差別
參考來源:生信技能樹
友情鏈接:
課程分享
生信技能樹全球公益巡講
(https://mp.weixin.qq.com/s/E9ykuIbc-2Ja9HOY0bn_6g)
B站公益74小時生信工程師教學視頻合輯
(https://mp.weixin.qq.com/s/IyFK7l_WBAiUgqQi8O7Hxw)
招學徒:
(https://mp.weixin.qq.com/s/KgbilzXnFjbKKunuw7NVfw)
歡迎關注公眾號:青島生信菜鳥團