參考:
library(edgeR)
#載入row count文件
raw_count<-read.csv(file.choose(),header = T,sep = ",")
head(raw_count)
#構(gòu)建DGEList對象
group<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),
levels = c("ASD","Healthy"))
genelist<-DGEList(counts = as.matrix(raw_count), group = group)
#過濾low counts數(shù)據(jù):利用CPM標(biāo)準(zhǔn)化
keep<-rowSums(cpm(genelist) > 0.5 ) >=2
table(keep)
genelist.filted<-genelist[keep, ,keep.lib.sizes=FALSE]
#使用TMM算法對DGEList標(biāo)準(zhǔn)化
genelist.norm<-calcNormFactors(genelist.filted)
#實(shí)驗(yàn)設(shè)計(jì)矩陣
design<-model.matrix(~0+group)
colnames(design)<-levels(group)
design
#估計(jì)離散值(Dispersion)
genelist.Disp<-estimateDisp(genelist.norm, design, robust = TRUE)
plotBCV(genelist.Disp)
#quasi-likelihood (QL)擬合NB模型:用于解釋生物學(xué)和技術(shù)性導(dǎo)致的基因特異性變異
fit<-glmQLFit(genelist.Disp, design, robust=TRUE)
head(fit$coefficients)
#差異表達(dá)檢驗(yàn):構(gòu)建比較矩陣
cntr.vs.KD<-makeContrasts(control-ASD, levels = design)
res<-glmQLFTest(fit, contrast = cntr.vs.KD)
ig.edger<-res$table[p.adjust(res$table$PValue, method = "BH") < 0.05, ]
#提取顯著性差異的基因
topTags(res,n=10)
is.de <- decideTestsDGE(res)
summary(is.de)
plotMD(res, status=is.de, values=c(1,-1), col=c("red","blue"),
legend="topright")