rm(list=ls())
setwd('/Users/zhangjuxiang/Desktop/R time seq/')
#Bioconductor 安裝 edgeR
#install.packages('BiocManager') #需要首先安裝 BiocManager,如果尚未安裝請先執(zhí)行該步
BiocManager::install('edgeR',force=T)
#讀取基因表達(dá)矩陣
targets <- read.csv('rawdata.csv')
as.matrix(targets)
rownames(targets) <- targets[,1]
targets <- targets[,-1]
#指定分組柿扣,注意要保證表達(dá)矩陣中的樣本順序和這里的分組順序是一一對應(yīng)的
#對照組在前页响,處理組在后
group <- rep(c('BC', 'EC'),c(121,192))
library(edgeR)
#數(shù)據(jù)預(yù)處理
#(1)構(gòu)建 DGEList 對象
dgelist <- DGEList(counts = targets, group = group)
#(2)過濾 low count 數(shù)據(jù),例如 CPM 標(biāo)準(zhǔn)化(推薦)
keep <- rowSums(cpm(dgelist) > 1 ) >= 2
dgelist <- dgelist[keep,, keep.lib.sizes = FALSE]
#(3)標(biāo)準(zhǔn)化鸟缕,以 TMM 標(biāo)準(zhǔn)化為例
dgelist_norm <- calcNormFactors(dgelist, method = 'TMM')
#差異表達(dá)基因分析
#首先根據(jù)分組信息構(gòu)建試驗設(shè)計矩陣晶框,分組信息中一定要是對照組在前排抬,處理組在后
design <- model.matrix(~group)
#(1)估算基因表達(dá)值的離散度
dge <- estimateDisp(dgelist_norm, design, robust = TRUE)
#(2)模型擬合,edgeR 提供了多種擬合算法
#負(fù)二項廣義對數(shù)線性模型
fit <- glmFit(dge, design, robust = TRUE)
lrt <- topTags(glmLRT(fit), n = nrow(dgelist$counts))
write.table(lrt, 'control_treat.glmLRT.txt', sep = '\t', col.names = NA, quote = FALSE)
#擬似然負(fù)二項廣義對數(shù)線性模型
fit <- glmQLFit(dge, design, robust = TRUE)
lrt <- topTags(glmQLFTest(fit), n = nrow(dgelist$counts))
write.table(lrt, 'control_treat.glmQLFit.txt', sep = '\t', col.names = NA, quote = FALSE)
##篩選差異表達(dá)基因
#讀取上述輸出的差異倍數(shù)計算結(jié)果
gene_diff <- read.delim('control_treat.glmLRT.txt', row.names = 1, sep = '\t', check.names = FALSE)
#首先對表格排個序授段,按 FDR 值升序排序蹲蒲,相同 FDR 值下繼續(xù)按 log2FC 降序排序
gene_diff <- gene_diff[order(gene_diff$FDR, gene_diff$logFC, decreasing = c(FALSE, TRUE)), ]
#log2FC≥1 & FDR<0.01 標(biāo)識 up,代表顯著上調(diào)的基因
#log2FC≤-1 & FDR<0.01 標(biāo)識 down侵贵,代表顯著下調(diào)的基因
#其余標(biāo)識 none届搁,代表非差異的基因
gene_diff[which(gene_diff$logFC >= 1 & gene_diff$FDR < 0.05),'sig'] <- 'up'
gene_diff[which(gene_diff$logFC <= -1 & gene_diff$FDR < 0.05),'sig'] <- 'down'
gene_diff[which(abs(gene_diff$logFC) <= 1 | gene_diff$FDR >= 0.05),'sig'] <- 'none'
#輸出選擇的差異基因總表
gene_diff_select <- subset(gene_diff, sig %in% c('up', 'down'))
write.table(gene_diff_select, file = 'control_treat.glmQLFit.select.txt', sep = '\t', col.names = NA, quote = FALSE)
#根據(jù) up 和 down 分開輸出
gene_diff_up <- subset(gene_diff, sig == 'up')
gene_diff_down <- subset(gene_diff, sig == 'down')
write.table(gene_diff_up, file = 'control_treat.glmQLFit.up.txt', sep = '\t', col.names = NA, quote = FALSE)
write.table(gene_diff_down, file = 'control_treat.glmQLFit.down.txt', sep = '\t', col.names = NA, quote = FALSE)
install.packages('pheatmap')
library(pheatmap)
{
tmp = gene_diff_select[gene_diff_select$PValue < 0.05,]
#差異結(jié)果需要先根據(jù)p值挑選
nrDEG_Z = tmp[ order( tmp$logFC ), ]
nrDEG_F = tmp[ order( -tmp$logFC ), ]
choose_gene = c( rownames( nrDEG_Z )[1:100], rownames( nrDEG_F )[1:100] )
choose_matrix = targets[ choose_gene, ]
choose_matrix = t( scale( t( choose_matrix ) ) )
choose_matrix[choose_matrix > 2] = 2
choose_matrix[choose_matrix < -2] = -2
annotation_col = data.frame( CellType = factor( group ) )
rownames( annotation_col ) = colnames( targets )
choose_matrix <- na.omit(choose_matrix)
pheatmap( fontsize = 2, choose_matrix, annotation_col = annotation_col, show_rownames = F, annotation_legend = F, filename = "heatmap_BRCA_medianexp2.png")
}
install.packages('ggplot2')
library( "ggplot2" )
nrDEG <- gene_diff
logFC_cutoff <- with( nrDEG, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) )
logFC_cutoff
logFC_cutoff = 1
{
nrDEG$change = as.factor( ifelse( nrDEG$PValue < 0.01 & abs(nrDEG$logFC) > logFC_cutoff,
ifelse( nrDEG$logFC > logFC_cutoff , 'UP', 'DOWN' ), 'NOT' ) )
save( nrDEG, file = "nrDEG_array_medianexp.Rdata" )
this_tile <- paste0( 'Cutoff for logFC is ', round( logFC_cutoff, 3 ),
' The number of up M/Z is ', nrow(nrDEG[ nrDEG$change =='UP', ] ),
' The number of down M/Z is ', nrow(nrDEG[ nrDEG$change =='DOWN', ] ) )
volcano = ggplot(data = nrDEG, aes( x = logFC, y = -log10(PValue), color = change)) +
geom_point( alpha = 0.4, size = 1.75) +
theme_set( theme_set( theme_minimal( base_size = 15 ) ) ) +
xlab( "log2 fold change" ) + ylab( "-log10 p-value" ) +
theme(legend.title = element_text(colour="black", size=6, face="bold")) +
theme(legend.text = element_text(colour="black", size = 7, face = "bold")) +
theme(axis.title.x = element_text(size = 9, color = "black", face = "bold")) +
theme(axis.title.y = element_text(size = 9, color = "black", face = "bold")) +
ggtitle( this_tile ) + theme( plot.title = element_text( size = 8, hjust = 0.5, face = "bold" )) +
theme(legend.position=c(1.2, 0.8)) +
theme(aspect.ratio=1) +
scale_colour_manual( values = c('green','black','red') ) + theme(panel.grid.major = element_line(colour = "white",
linetype = "blank"), panel.grid.minor = element_line(colour = "white"),
panel.background = element_rect(fill = "aliceblue",
colour = "white"), plot.background = element_rect(colour = "azure1"))
print( volcano )
ggsave( volcano, filename = 'volcano_BRCA_medianexp.tiff' )
dev.off()
}
幫老婆寫代謝組的熱圖和火山圖
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
- 文/潘曉璐 我一進(jìn)店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來仪或,“玉大人码耐,你說我怎么就攤上這事∪芷洌” “怎么了骚腥?”我有些...
- 文/不壞的土叔 我叫張陵,是天一觀的道長瓶逃。 經(jīng)常有香客問我束铭,道長,這世上最難降的妖魔是什么厢绝? 我笑而不...
- 正文 為了忘掉前任契沫,我火速辦了婚禮,結(jié)果婚禮上昔汉,老公的妹妹穿的比我還像新娘懈万。我一直安慰自己,他們只是感情好靶病,可當(dāng)我...
- 文/花漫 我一把揭開白布会通。 她就那樣靜靜地躺著,像睡著了一般娄周。 火紅的嫁衣襯著肌膚如雪涕侈。 梳的紋絲不亂的頭發(fā)上,一...
- 文/蒼蘭香墨 我猛地睜開眼郊闯,長吁一口氣:“原來是場噩夢啊……” “哼且轨!你這毒婦竟也來了?” 一聲冷哼從身側(cè)響起虚婿,我...
- 正文 年R本政府宣布,位于F島的核電站冯吓,受9級特大地震影響倘待,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜组贺,卻給世界環(huán)境...
- 文/蒙蒙 一凸舵、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧失尖,春花似錦啊奄、人聲如沸。這莊子的主人今日做“春日...
- 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至胧辽,卻和暖如春峻仇,著一層夾襖步出監(jiān)牢的瞬間公黑,已是汗流浹背邑商。 一陣腳步聲響...
推薦閱讀更多精彩內(nèi)容
- 測序行業(yè)的蓬勃發(fā)展奈附,帶來微生物組學(xué)日新月異的變化全度。目前,單一組學(xué)的文章不斷“貶值”斥滤,前沿研究的目光從單一組學(xué)逐步拓...
- 這個文章跟著之前的文章完整轉(zhuǎn)錄組RNAseq分析流程(tophat2+cufflink+cuffdiff)用了之前...
- 差異基因展示方式除了熱圖、火山圖挑胸,還有傳說中的“瀑布圖”(不確定是不是叫瀑布圖痒筒,我感覺更像S型圖),近兩天我在瀏覽...
- 16宿命:用概率思維提高你的勝算 以前的我是風(fēng)險厭惡者茬贵,不喜歡去冒險凸克,但是人生放棄了冒險,也就放棄了無數(shù)的可能闷沥。 ...