03 PCA和熱圖
rm(list = ls())
load(file = "step1output.Rdata")
load(file = "step2output.Rdata")
#輸入數(shù)據(jù):exp和group_list
#Principal Component Analysis
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials
dat=as.data.frame(t(exp))
library(FactoMineR)#畫主成分分析圖需要加載這兩個包
library(factoextra)
# pca的統(tǒng)一操作走起
dat.pca <- PCA(dat, graph = FALSE)
pca_plot <- fviz_pca_ind(dat.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = group_list, # color by groups
#palette = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
pca_plot
ggsave(plot = pca_plot,filename = paste0(gse,"PCA.png"))
save(pca_plot,file = "pca_plot.Rdata")
#熱圖
cg=names(tail(sort(apply(exp,1,sd)),1000))
n=exp[cg,]
#繪制熱圖
annotation_col=data.frame(group=group_list)
rownames(annotation_col)=colnames(n)
library(pheatmap)
pheatmap(n,
show_colnames =F,
show_rownames = F,
annotation_col=annotation_col,
scale = "row")
dev.off()
04差異分析葵第、增加探針名列遍希,探針I(yè)D轉(zhuǎn)換
rm(list = ls())
load(file = "step2output.Rdata")
#差異分析赃春,用limma包來做
#需要表達矩陣和group_list,不需要改
library(limma)
design=model.matrix(~group_list) #根據(jù)grouplist生成模型矩陣design
fit=lmFit(exp,design) #從exp得到fit
fit=eBayes(fit)#貝葉斯擬合
deg=topTable(fit,coef=2,number = Inf)#fit里面提取結(jié)果得到deg蝇摸,六列數(shù)據(jù)logFC和pvalue
#為deg數(shù)據(jù)框添加幾列 探針的行名對應(yīng)到基因ids
#1.加probe_id列夹孔,把行名變成一列#法一:$列名+賦值。法二如下
library(dplyr)
deg <- mutate(deg,probe_id=rownames(deg))
head(deg)
#2.加symbol列琅轧,火山圖要用
deg <- inner_join(deg,ids,by="probe_id")
head(deg)
#按照symbol列去重復(fù). 1隨機 2最大值 3 pingjun
deg <- deg[!duplicated(deg$symbol),]
#3.加change列,標記上下調(diào)基因
logFC_t=1
P.Value_t = 0.01
k1 = (deg$P.Value < P.Value_t)&(deg$logFC < -logFC_t)
k2 = (deg$P.Value < P.Value_t)&(deg$logFC > logFC_t)
#sum(k1)下調(diào)基因的個數(shù)
#ifelse的使用:滿足K1則為down伍绳,不滿足,嵌套下一個ifelse
change = ifelse(k1,"down",ifelse(k2,"up","stable"))
table(change)
deg <- mutate(deg,change)
head(deg)
#4.加ENTREZID列乍桂,用于富集分析(symbol轉(zhuǎn)entrezid冲杀,然后inner_join)
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
s2e <- bitr(deg$symbol,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人類
#org.Hs.eg.db人類對應(yīng)ID轉(zhuǎn)換效床,是用來提供其他物種http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
save(group_list,deg,logFC_t,P.Value_t,file = "step4output.Rdata")
05 火山圖和熱圖
rm(list = ls())
load(file = "step1output.Rdata")
load(file = "step4output.Rdata")
#1.火山圖----
library(dplyr)
library(ggplot2)
dat = deg
p <- ggplot(data = dat,
aes(x = logFC,
y = -log10(P.Value))) +
geom_point(alpha=0.4, size=3.5,
aes(color=change)) +
ylab("-log10(Pvalue)")+
scale_color_manual(values=c("blue", "grey","red"))+
geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.8) +
theme_bw()
p
if(T){
#自選基因
for_label <- dat%>%
filter(symbol %in% c("TRPM3","SFRP1"))
}
if(F){
#p值最小的10個
for_label <- dat %>% head(10)
}
if(F) {
#p值最小的前3下調(diào)和前3上調(diào)
x1 = dat %>%
filter(change == "up") %>%
head(3)
x2 = dat %>%
filter(change == "down") %>%
head(3)
for_label = rbind(x1,x2)
}
volcano_plot <- p +
geom_point(size = 3, shape = 1, data = for_label) +
ggrepel::geom_label_repel(
aes(label = symbol),
data = for_label,
color="black"
)
volcano_plot
ggsave(plot = volcano_plot,filename = paste0(gse,"volcano.png"))
#2.差異基因熱圖----
load(file = 'step2output.Rdata')
if(F){
#全部差異基因
cg = deg$probe_id[deg$change !="stable"]
length(cg)
}else{
#取前30上調(diào)和前30下調(diào)
x=deg$logFC[deg$change !="stable"]
names(x)=deg$probe_id[deg$change !="stable"]
cg=c(names(head(sort(x),30)),names(tail(sort(x),30)))
length(cg)
}
n=exp[cg,]
dim(n)
#作熱圖
library(pheatmap)
annotation_col=data.frame(group=group_list)
rownames(annotation_col)=colnames(n)
library(ggplotify)
heatmap_plot <- as.ggplot(pheatmap(n,show_colnames =F,
show_rownames = F,
scale = "row",
#cluster_cols = F,
annotation_col=annotation_col))
heatmap_plot
ggsave(heatmap_plot,filename = paste0(gse,"heatmap.png"))
load("pca_plot.Rdata")
library(patchwork)
(pca_plot + volcano_plot +heatmap_plot)+ plot_annotation(tag_levels = "A")