RNASeq實(shí)戰(zhàn)練習(xí)-轉(zhuǎn)錄組基本圖片繪制
下文載入數(shù)據(jù)僅為測試畫圖用终畅,沒有研究意義
生物學(xué)重復(fù)平行性檢驗(yàn)
樣本相關(guān)性分析
定量得到的 countdata.csv 作為輸入數(shù)據(jù)
# R包安裝
install.packages("corrplot")
library(corrplot)
all <- read.csv('countdata.csv', row.names = 1, header = T)
# 將載入的數(shù)據(jù)轉(zhuǎn)化成數(shù)值型猬膨,否則計(jì)算相關(guān)系數(shù)的時(shí)候會(huì)報(bào)錯(cuò)
all <- as.data.frame(lapply(all, as.numeric))
# 計(jì)算相關(guān)系數(shù)
all1 <- log(all[1:6] + 1)
mf_cor <- cor(all1)
# 繪圖
pdf(file = "相關(guān)性.pdf")
corrplot(corr = mf_cor, method = "color", order = "original",tl.col = "black", addrect = 4,addCoef.col = "grey")
dev.off()
PCA主成分分析
ath <- read.csv('countdata.csv', row.names = 1, header = T)
coldata <- read.csv('coldata.csv')
ath1 <- log(ath[1:6] + 1)
ath1 <- t(ath1)
df_pca <- prcomp(ath1)
df_pcs <- data.frame(df_pca$x, group = coldata$condition)
# 畫出來了,但是不太懂代碼
library(ggplot2)
pdf(file = "PCA.pdf")
percentage <- round(df_pca$sdev / sum(df_pca$sdev) * 100, 2)
percentage <- paste(colnames(df_pcs),"(", paste(as.character(percentage), "%", ")", sep = ""))
ggplot(df_pcs,aes(x = PC1, y = PC2, color = group)) + geom_point(size = 4) + xlab(percentage[1]) + ylab(percentage[2])
dev.off()
表達(dá)差異分析
火山圖
將 DESeq2 分析得到的所有差異基因文件整理一下,刪除有 NA 的行,保存為 volcano.txt
library(ggplot2)
dataset1 <- read.table('volcano.txt', header = TRUE)
cut_off_pvalue = 0.01
cut_off_logFC = 1
pdf(file = "volcano.pdf")
ggplot(dataset1, aes(x = log2FoldChange, y = -log10(pvalue), colour = sig)) + geom_point() + scale_color_manual(values = c("#D6604D", "#d2dae2","#4393C3")) + geom_vline(xintercept = c(-1,1), lty = 4, col = "black", lwd = 0.8) + geom_hline(yintercept = -log10(cut_off_pvalue), lty = 4, col = "black", lwd = 0.8) + labs(x = "log2(fold change)", y = "-log10 (p-value)") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position="right", legend.title = element_blank())
dev.off()
右圖是把左圖最高的那個(gè)給刪除重新畫的
聚類
R學(xué)習(xí)筆記(2):用pheatmap畫個(gè)熱圖
根據(jù) DESeq2 分析得到的差異表達(dá)基因 ID 從定量得到的 countdata.csv 中提取表達(dá)量數(shù)據(jù)吮螺,另存為 pheatmap.csv
install.packages("pheatmap")
library(pheatmap)
inputdata <- read.csv('pheatmap.csv', row.names = 1, header = T)
inputdata <- as.matrix(inputdata)
bk = unique(c(seq(0,7, length = 100)))
pheatmap(log(inputdata+1), breaks = bk, cluster_col = FALSE, legend_breaks = c(1:100), cellwidth = 60,cellheight = 9,border = FALSE)
多組差異表達(dá)分析比較
維恩圖
install.packages("VennDiagram")
library(VennDiagram)
venn <- read.csv('venn5.csv', header = T)
venn_ploy <- venn.diagram(x = list(WT_1 = venn$WT_1, WT_2 = venn$WT_2, WT_3 = venn$WT_3, NSR_1 = venn$NSR_1, NSR_2 = venn$NSR_2), filename = NULL, fill = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3", "orchid3"))
grid.draw(venn_ploy)
upset圖
為了畫圖隨便整理的數(shù)據(jù)欣簇,保存為 upset.csv
install.packages("UpSetR")
library(UpSetR)
gene <- read.csv('upset.csv', row.names = 1, header = T)
pdf(file = "upset.pdf")
upset(gene, nset = 6, nintersects = 100, order.by = c('freq', 'degree'), decreasing = c(TRUE, TRUE), queries = list(list(query = intersects, params = c('WT_1', 'WT_2', 'WT_3'), color = 'blue'), list(query = intersects, params = c('NSR_1','NSR_2','NSR_3'), color = 'red')))
dev.off()
GO富集分析
ggplot2中顯示坐標(biāo)軸_ggplot2|繪制GO富集柱形圖
library(ggplot2)
data <- read.csv('GO.csv', header = T)
data$name <- factor(data$name,levels=data$name)
CPCOLS <- c("#66C3A5", "#8DA1CB", "#FD8D62")
ggplot(data,aes(x = number, y = name, fill = type)) + geom_bar(stat = "identity", width = 0.8) + coord_flip() + theme_bw() + scale_fill_manual(values = CPCOLS) + xlab("GO term") + ylab("Num of Genes") + labs(title = "The Most Enriched GO Terms") + theme(axis.text.x = element_text(face = "bold",color = "gray50", angle = 70, vjust = 1, hjust = 1))
KEGG富集分析
library(ggplot2)
pathway <- read.csv("KEGG.csv",header = T)
ggplot(pathway, aes(pvalue, pathway)) + geom_point(aes(size = Count, color = -1*log10(qvalue))) + scale_color_gradient(low = "green", high = "red") + labs(color = expression(-log[10](qvalue)), size = "Gene", x = "Pvalue", y = "Pathway name", title = "Pathway enrichment") + theme_bw()