原貼地址:http://www.reibang.com/p/3bfb21d24b74
?????????????????? http://www.reibang.com/p/4910d7cec5c8
1.讀取自己表達(dá)矩陣
# 構(gòu)建自己的表達(dá)矩陣并讀取
> control1 <- read.table("~/disk2/data/rna-seq/matrix/SRR3589959.count", sep="\t", col.names = c("gene_id","control1"))
> control2 <- read.table("~/disk2/data/rna-seq/matrix/SRR3589961.count", sep="\t", col.names = c("gene_id","control2"))
> rep1 <- read.table("~/disk2/data/rna-seq/matrix/SRR3589960.count", sep="\t", col.names = c("gene_id","akap951"))
> rep2 <- read.table("~/disk2/data/rna-seq/matrix/SRR3589962.count", sep="\t",col.names = c("gene_id","akap952"))
> raw_count <- merge(merge(control1, control2,by="gene_id"),merge(rep1,rep2, by="gene_id"))
> raw_count_filt <- raw_count[-48823:-48825,]
> raw_count_filter <- raw_count_filt[-1:-2,]
> ENSEMBL <- gsub("\\.\\d*","", raw_count_filter$gene_id)
> row.names(raw_count_filter) <- ENSEMBL
> raw_count_filter <- raw_count_filter[ ,-1]
這里有兩個(gè)問題戏锹,第一個(gè)就是刪除行列税朴,自己根據(jù)情況選擇吐限,理論上把名字差別太大的刪了。
矩陣數(shù)據(jù)結(jié)構(gòu)
2.構(gòu)建dds對(duì)象
# 這一步很關(guān)鍵,要明白condition這里是因子,不是樣本名稱;小鼠數(shù)據(jù)有對(duì)照組和處理組伐庭,各兩個(gè)重復(fù)
> condition <- factor(c(rep("control",2),rep("akap95",2)), levels = c("control","akap95"))# 獲取count數(shù)據(jù)
> countData <- raw_count_filter[,1:4]
> colData <- data.frame(row.names=colnames(raw_count_filter), condition)
> dds <- DESeqDataSetFromMatrix(countData, colData, design= ~ condition)# 查看一下dds的內(nèi)容
> head(dds)
adds概要信息
3.DESeq標(biāo)準(zhǔn)化dds
# normalize 數(shù)據(jù)
> dds2 <- DESeq(dds)# 查看結(jié)果的名稱粉渠,本次實(shí)驗(yàn)中是"Intercept","condition_akap95_vs_control"
> resultsNames(dds2)# 將結(jié)果用results()函數(shù)來獲取圾另,賦值給res變量
res <- results(dds2)# summary一下霸株,看一下結(jié)果的概要信息summary(res)
result結(jié)果可以看到一些基本的信息,p值默認(rèn)小于0.1集乔,上調(diào)基因有625個(gè)去件,下調(diào)基因有445個(gè)。
res的概要信息
4.提取差異分析結(jié)果
# 獲取padj(p值經(jīng)過多重校驗(yàn)校正后的值)小于0.05扰路,表達(dá)倍數(shù)取以2為對(duì)數(shù)后大于1或者小于-1的差異表達(dá)基因尤溜。
> table(res$padj<0.05)
> res <- res[order(res$padj),]
> diff_gene_deseq2 <-subset(res,padj <0.05& (log2FoldChange >1| log2FoldChange <-1))
> diff_gene_deseq2 <- row.names(diff_gene_deseq2)
resdata<-merge(as.data.frame(res),as.data.frame(counts(dds2,normalize=TRUE)),by="row.names",sort=FALSE)
# 得到csv格式的差異表達(dá)分析結(jié)果
> write.csv(resdata,file="control_vs_akap95.cvs",row.names = F)
把名字變成上圖,gsub那個(gè)不會(huì)整汗唱,那就導(dǎo)出csv宫莱,然后手工刪除在導(dǎo)入進(jìn)行后續(xù)分析
后續(xù)就可以轉(zhuǎn)換基因名 各種折騰了
require(DOSE)
require(clusterProfiler)
ekk <- enrichKEGG(gene=gene,organism="human",pvalueCutoff=0.01)
ego <- enrichGO(gene=gene,OrgDb="org.Hs.eg.db",ont="CC",pvalueCutoff=0.01,readable=TRUE)
write.csv(summary(ekk),"KEGG-enrich.csv",row.names?=F)
write.csv(summary(ego),"GO-enrich.csv",row.names?=F)
ego <- enrichGO(gene????????? = gene,
universe????? = names(geneList),
OrgDb???????? = org.Hs.eg.db,
ont?????????? = "CC",
pAdjustMethod = "BH",
pvalueCutoff? = 0.01,
qvalueCutoff? = 0.05)
ego2 <- enrichGO(gene???????? = gene.df$ENSEMBL,
OrgDb???????? = org.Hs.eg.db,
keytype?????? = 'ENSEMBL',
ont?????????? = "CC",
pAdjustMethod = "BH",
pvalueCutoff? = 0.01,
qvalueCutoff? = 0.05)
ego3 <- enrichGO(gene???????? = gene.df$SYMBOL,
OrgDb???????? = “org.Hs.eg.db”,
keytype?????? = 'SYMBOL',
ont?????????? = "CC",
pAdjustMethod = "BH",
pvalueCutoff? = 0.01,
qvalueCutoff? = 0.05)
ego <- enrichGO(gene????????? = DEG$,
OrgDb???????? = "org.Hs.eg.db",
ont?????????? = "CC",
pAdjustMethod = "BH",
pvalueCutoff? = 0.01,
qvalueCutoff? = 0.05)
ego <- enrichGO(gene=gene,OrgDb="org.Hs.eg.db",ont="CC",pvalueCutoff=0.01,readable=TRUE)
DO<-enrichDO(gene=DEG$Gene.ID, ont = "DO", pvalueCutoff = 0.01, pAdjustMethod = "BH",qvalueCutoff = 0.05)
eg=bitr(geneID = PP$geneID, "ENTREZID", "SYMBOL", "org.Hs.eg.db")
df.id<-bitr(df$SYMBOL, fromType ="SYMBOL", toType ="ENTREZID",OrgDb ="org.Hs.eg.db")
easy.df<-merge(df,df.id,by="SYMBOL",all=F)
sortdf<-easy.df[order(easy.df$foldChange, decreasing =T),]
gene.expr = sortdf$foldChange
names(gene.expr) <- sortdf$ENTREZID