方法選擇
做差異分析需要的數據:表達矩陣和分組信息
當表達矩陣為counts時崔泵,優(yōu)先使用edgeR素标;當只有TPM時妆档,TPM的分布接近芯片數據,使用limma;FPKM做差異分析呛哟,不推薦叠荠!
limma
使用limma對TPM表達矩陣進行差異分析并繪制火山圖:
TPM <- readRDS('./data/TPM.RDS')
meta_data <- readRDS('./data/meta_data.RDS')
exp <- TPM
exp <- exp[apply(exp,1,mean)>0,]
#impute missing expression data
mat <- impute.knn(as.matrix(exp))
rt <- mat$data
rt <- avereps(rt)
qx <- as.numeric(quantile(rt, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) || (qx[6]-qx[1] > 50 && qx[2] > 0)
if (LogC) { rt[which(rt <= 0)] <- NaN
rt <- log2(rt) }
feature <- 'qixuxinlie2'
stage=1
test_idx <- which(meta_data[[feature]] == stage)
control_idx <- which(meta_data[[feature]] != stage)
test_num <- length(test_idx) #實驗組樣本數量
control_num <- length(control_idx) #對照組樣本數量
class <- c(rep("dis",test_num),rep("con",control_num))
design <- model.matrix(~0+factor(class))
colnames(design) <- c("con","dis")
fit <- lmFit(rt[,c(test_idx,control_idx)],design)
cont.matrix<-makeContrasts(dis-con,levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
allDiff <- topTable(fit2,adjust='BH',number=200000)
#volcano
allDiff$threshold <- 'non'
allDiff$threshold[allDiff$adj.P.Val < 0.05 & allDiff$logFC > 1 ] = "up"
allDiff$threshold[allDiff$adj.P.Val < 0.05 & allDiff$logFC < -1 ] = "down"
allDiff$threshold <- factor(allDiff$threshold,levels = c('up','down','non'))
allDiff$gene <- rownames(allDiff)
allDiff$gene[is.na(allDiff$threshold)] <- ''
diffSig <- allDiff[which(abs(allDiff$logFC) > 1 & allDiff$adj.P.Val < 0.05), ]
ggplot(allDiff,aes(x=logFC,y=-log10(adj.P.Val),colour = threshold))+
xlab("log2 Fold Change")+
ylab("-log10Pvalue") +
geom_point(size=1,alpha=0.6)+
# geom_text(aes(label = gene))+
scale_color_manual(breaks = c('up','down','non'),values = c("#BC3C28","#0072B5","grey")) +
geom_hline(aes(yintercept=-log10(0.05)),colour="grey",size=1 ,linetype=2) + #增加水平間隔線
geom_vline(aes(xintercept=0), colour="grey",size=1 ,linetype=2) + #增加垂直間隔線
theme_few() +
theme(legend.title = element_blank()) + #去掉網格背景和圖注標簽
ggtitle(paste0('Volcano Plot of ',feature,'=',stage,'\n',
length(which(allDiff == 'up')),' genes up','\n',
length(which(allDiff == 'down')),' genes down'))