下載GSE
rm(list = ls())
options(stringsAsFactors = F)
library(GEOquery)
library(limma)
library(dplyr)
gset <- getGEO('GSE112790',destdir = '.',AnnotGPL = T,getGPL = T)
exprSet <- exprs(gset[[1]])
dim(exprSet)
pData <- pData(gset[[1]])
group_list=as.character(pData$geo_accession)
過濾表達矩陣
library(hgu133plus2.db)
ids <- toTable(hgu133plus2SYMBOL)
length(unique(ids$symbol))
exprSet <- exprSet[rownames(exprSet) %in% ids$probe_id, ]
刪除表達量低的探針挠阁,取最大表達量
ids <- ids[match(rownames(exprSet),ids$probe_id),]
head(ids)
exprSet[1:5,1:5]
tmp <- by(exprSet,ids$symbol,function(x) rownames(x)[which.max(rowMeans(x))] )
probes <- as.character(tmp)
exprSet <- exprSet[rownames(exprSet) %in% probes,]
dim(exprSet)
head(exprSet)
探針名轉(zhuǎn)為基因名
rownames(exprSet) <- ids[match(rownames(exprSet),ids$probe_id),2]
exprSet[1:5,1:5]
write.csv(exprSet,"GSE112790_matrix.csv")
制作分組矩陣
sample <- pData$geo_accession
tissue <- rep(c("normal","tumor"),c(15,183))
design_df <- data.frame(sample,tissue)
制作差異比較矩陣
cont.matrix <- makeContrasts(
vs = TStumor-TSnormal,levels = design)
lmFit
TS <- tissue
TS <- factor(TS, levels = unique(TS))
design <- model.matrix(~0+TS)
fit <- lmFit(exprSet, design)
eBayes
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
topTable
tempOutput = topTable(fit2, coef=1, n=Inf)
nrDEG = na.omit(tempOutput)
head(nrDEG)
write.csv(nrDEG,"limma_notrend.results.csv",quote = F)
results <- decideTests(fit2)
vennDiagram(results)