昨天上完課沒(méi)有整理完押桃,今天繼續(xù)
9.之前用的參考基因組不是文章中所用的需要進(jìn)一步優(yōu)化
進(jìn)行基因組的坐標(biāo)轉(zhuǎn)換,這一步真的有點(diǎn)難,轉(zhuǎn)換的方式比較多可以參考生信菜鳥(niǎo)團(tuán)的博文https://cloud.tencent.com/developer/article/1054520
暫時(shí)先用曾老師代碼,嘗試?yán)斫猓竺嬖偕钊雽W(xué)習(xí)
library(rtracklayer)
path='dm3ToDm6.over.chain'
ch = import.chain(path)
ch
require(ChIPseeker)
require(TxDb.Dmelanogaster.UCSC.dm3.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
require(clusterProfiler)
library(ChIPpeakAnno)
peak_dm3 <- readPeakFile('Pho_WT.narrowPeak.bed')
seqlevelsStyle(peak_dm3) = "UCSC" # necessary
peak_dm6 = liftOver(peak_dm3, ch)
class(peak_dm6)
peak_dm3
peak_dm6
他在演示的時(shí)候這個(gè)R腳本運(yùn)行較慢茵乱,最后用shell腳本完成相關(guān)內(nèi)容,還需要在消化一下
10. 再次重畫(huà)韋恩圖根據(jù)原文查看分別在H3K27me3 domain
內(nèi)外的peaks
rm(list=ls())
require(ChIPseeker)
require(TxDb.Dmelanogaster.UCSC.dm3.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
require(clusterProfiler)
(bedFiles=list.files(pattern = '*dm6'))
bedFiles=bedFiles[-2]
library(ChIPpeakAnno)
fs=lapply(bedFiles,function(x){
peak <- readPeakFile( x )
keepChr= !grepl('Het',seqlevels(peak))
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
peak
})
library(stringr)
str_split(bedFiles,'_',simplify = T)[,1]
H3K27 <- readPeakFile('H3K27_peaks.narrowPeak')
keepChr= seqlevels(H3K27) %in% c('2L','2R','3L','3R','4','X','Y','M')
seqlevels(H3K27, pruning.mode="coarse") <- seqlevels(H3K27)[keepChr]
seqlevels(H3K27, pruning.mode="coarse") <- paste0('chr',seqlevels(H3K27))
seqlevels(H3K27)
tmp1=findOverlapsOfPeaks(fs[[1]], H3K27)
tmp2=findOverlapsOfPeaks(fs[[2]], H3K27)
tmp3=findOverlapsOfPeaks(fs[[3]], H3K27)
ol <- findOverlapsOfPeaks(tmp1$peaklist$`fs..1..///H3K27`,
tmp2$peaklist$`fs..2..///H3K27`,
tmp3$peaklist$`fs..3..///H3K27`)
png('3_factors_overlapVenn_in_domain.png')
makeVennDiagram(ol,
NameOfPeaks=str_split(bedFiles,'_',simplify = T)[,1],
TxDb=txdb)
dev.off()
lapply(1:7,function(i){
gr=ol$peaklist[[i]]
dat=as.data.frame(gr)[,1:3]
dat[,1]=gsub('chr','',dat[,1])
write.table(dat,sep = '\t',
col.names = F,file = paste0('G',i,'_in.bed')
,quote = F,row.names = F)
})
ol <- findOverlapsOfPeaks(tmp1$peaklist$fs..1..,
tmp2$peaklist$fs..2..,
tmp3$peaklist$fs..3..)
png('3_factors_overlapVenn_not_domain.png')
makeVennDiagram(ol,
NameOfPeaks=str_split(bedFiles,'_',simplify = T)[,1],
TxDb=txdb)
dev.off()
lapply(1:7,function(i){
gr=ol$peaklist[[i]]
dat=as.data.frame(gr)[,1:3]
dat[,1]=gsub('chr','',dat[,1])
write.table(dat,sep = '\t',
col.names = F,file = paste0('G',i,'_out.bed')
,quote = F,row.names = F)
})
生成2個(gè)新的韋恩圖孟岛,并且得到overlap后的peaks數(shù)據(jù)集的bed文件
這一步的完成需要上一步得到的轉(zhuǎn)換參考基因組版本后的bed文件瓶竭,這個(gè)內(nèi)容需要再練習(xí)一下
12. 對(duì)之前的差異分析提取感興趣的基因進(jìn)行注釋
load(file = 'deg_output.Rdata')
library(ggpubr)
colnames(DEG_PhoKO)
DEG_PhoKO$log=log(DEG_PhoKO$baseMean+1)
DEG_PhoKO$change=ifelse(DEG_PhoKO$padj>0.05,'stable',
ifelse(DEG_PhoKO$log2FoldChange > 0,'up','down'))
table(DEG_PhoKO$change)
ggscatter(DEG_PhoKO,x="log" ,y="log2FoldChange",color = 'change')
DEG_SppsKO$log=log(DEG_SppsKO$baseMean+1)
DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05,'stable',
ifelse(DEG_SppsKO$log2FoldChange > 0,'up','down'))
table(DEG_SppsKO$change)
ggscatter(DEG_SppsKO,x="log" ,y="log2FoldChange",color = 'change')
library(UpSetR)
SppsKO_up=rownames(DEG_SppsKO[DEG_SppsKO$change=='up',])
SppsKO_down=rownames(DEG_SppsKO[DEG_SppsKO$change=='down',])
PhoKO_up=rownames(DEG_PhoKO[DEG_PhoKO$change=='up',])
PhoKO_down=rownames(DEG_PhoKO[DEG_PhoKO$change=='down',])
allG=unique(c(SppsKO_up,SppsKO_down,PhoKO_up,PhoKO_down))
df=data.frame(allG=allG,
SppsKO_up=as.numeric(allG %in% SppsKO_up),
SppsKO_down=as.numeric(allG %in% SppsKO_down),
PhoKO_up=as.numeric(allG %in% PhoKO_up),
PhoKO_down=as.numeric(allG %in% PhoKO_down))
upset(df)
colnames(df)
# "SppsKO_up" "SppsKO_down" "PhoKO_up" "PhoKO_down"
SppsKO_up_uniq=df[df[,2]==1 & df[,3]==0 & df[,4]==0 & df[,5]==0 ,1]
SppsKO_down_uniq=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==0 ,1]
PhoKO_up_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
PhoKO_down_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==0 & df[,5]==1 ,1]
SppsKO_up_PhoKO_up=df[df[,2]==1 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
SppsKO_down_PhoKO_down=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==1 ,1]
library(org.Dm.eg.db)
t1=toTable(org.Dm.egCHRLOC)
t2=toTable(org.Dm.egCHRLOCEND)
t3=toTable(org.Dm.egSYMBOL)
# 作業(yè)
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
gs=as.data.frame(genes(txdb))
library(org.Dm.eg.db)
t100=toTable(org.Dm.egFLYBASE)
t101=toTable(org.Dm.egSYMBOL)
t200=merge(t100,t101,by='gene_id')
colnames(gs)[6]="flybase_id"
t300=merge(t200,gs,by="flybase_id")
SppsKO_up_uniq
SppsKO_up_bed=t300[match(SppsKO_up_uniq[SppsKO_up_uniq %in% t300$symbol ],
t300$symbol),4:6]
SppsKO_down_uniq
SppsKO_down_bed=t300[match(SppsKO_down_uniq[SppsKO_down_uniq %in% t300$symbol ],
t300$symbol),4:6]
這個(gè)作業(yè)是參考上面的代碼完成peaks查找對(duì)應(yīng)基因進(jìn)行的注釋并做下有分析
13. 在上一步得到的基因集加載進(jìn)來(lái)進(jìn)行kegg
注釋
rm(list = ls())
load(file = '6-genesets.Rdata')
library(ggplot2)
library(clusterProfiler)
library(org.Dm.eg.db)
kegg_anno <- function(gs,pro){
#pro='SppsKO_up_uniq'
#gs=SppsKO_up_uniq
gene_up= bitr(gs, fromType = "SYMBOL",
toType = c( "ENTREZID"),
OrgDb = org.Dm.eg.db)[,2]
kk.up <- enrichKEGG(gene = gene_up,
organism = 'dme',
keyType = 'ncbi-geneid',
pvalueCutoff = 0.9,
qvalueCutoff =0.9)
head(kk.up)[,1:6]
kk=kk.up
dotplot(kk)
ggsave(paste0(pro,'_kk.png'))
kk=DOSE::setReadable(kk, OrgDb='org.Dm.eg.db',keyType='ENTREZID')
write.csv(kk@result,paste0(pro,'_kk.up.csv'))
}
l=list(SppsKO_up_uniq,SppsKO_down_uniq,
PhoKO_up_uniq, PhoKO_down_uniq,
SppsKO_up_PhoKO_up,SppsKO_down_PhoKO_down)
names(l)=c('SppsKO_up_uniq','SppsKO_down_uniq',
'PhoKO_up_uniq', 'PhoKO_down_uniq',
'SppsKO_up_PhoKO_up','SppsKO_down_PhoKO_down')
lapply(1:length(l), function(i){
kegg_anno(l[[i]],names(l)[i])
})
library(ggplot2)
library(clusterProfiler)
library(org.Dm.eg.db)
gcSample=lapply(l, function(x){
return( bitr(x, fromType = "SYMBOL",
toType = c( "ENTREZID"),
OrgDb = org.Dm.eg.db)[,2] )
})
xx <- compareCluster(gcSample, fun="enrichKEGG",
keyType = 'ncbi-geneid',
organism="dme", pvalueCutoff=0.05)
dotplot(xx)
注:這篇文章可以挖掘的地方進(jìn)行peaks的通路的注釋?zhuān)部梢愿鶕?jù)peaks對(duì)應(yīng)的promoter區(qū)對(duì)應(yīng)的調(diào)控基因進(jìn)行注釋和下游分析。
本次課程內(nèi)容很豐富蚀苛,關(guān)于相應(yīng)分析軟件的使用不熟練在验,需要進(jìn)一步練習(xí)。今天下載了文章原文堵未,后面就開(kāi)始讀文獻(xiàn),好好理解一下周末課程的內(nèi)容爭(zhēng)取消化掉盏触。