GSEA-analysis
1.加載數(shù)據(jù)
載入前一步分析得到的表達(dá)矩陣
library(ggstatsplot);
library(cowplot);
library(clusterProfiler);
library(stringr);
library(dplyr);
library(tidyr);
library(ggplot2);
library(ggstatsplot);
load(file = 'GSE63067_GSEA.Rdata')#導(dǎo)入上一步分析的數(shù)據(jù)
exprSet <- data_plot
exprSet[1:3,1:3]
## PAX8 CYP2A6 SCARB1
## GSM1539877 6.506860 11.94711 9.129116
## GSM1539878 6.313513 11.82544 9.402811
## GSM1539879 6.273058 11.42314 8.120977
2.批量相關(guān)性分析
將第一行目的基因跟其他行的編碼基因批量做相關(guān)性分析粉臊,得到相關(guān)性系數(shù)以及p值俺榆。
y <- as.numeric(exprSet[,"CCL20"])
colnames <- colnames(exprSet)
cor_data_df <- data.frame(colnames)
for (i in 1:length(colnames)){
test <- cor.test(as.numeric(exprSet[,i]),y,type="spearman")
cor_data_df[i,2] <- test$estimate
cor_data_df[i,3] <- test$p.value
}
names(cor_data_df) <- c("symbol","correlation","pvalue")
# 查看這個(gè)數(shù)據(jù)結(jié)構(gòu)
head(cor_data_df)
## symbol correlation pvalue
## 1 PAX8 -0.23354999 0.350963277
## 2 CYP2A6 -0.60172099 0.008244347
## 3 SCARB1 -0.19907443 0.428394688
## 4 TTLL12 -0.57277340 0.012974684
## 5 CYTOR 0.35144428 0.152686677
## 6 ADAM32 -0.01286106 0.959604984
3.篩選最相關(guān)的基因
篩選p值小于0.05,按照相關(guān)性系數(shù)絕對(duì)值選前500個(gè)的基因辟宗, 數(shù)量可以自己定尊搬。
cor_data_sig <- cor_data_df %>%
filter(pvalue < 0.05) %>%
arrange(desc(abs(correlation)))%>%
dplyr::slice(1:500)
4.隨機(jī)選取正的和負(fù)的分別作圖驗(yàn)證
正相關(guān)的選取IL2RG湾蔓;負(fù)相關(guān)選取MARK1
#正相關(guān)的選取IL2RG
ggscatterstats(data = exprSet,
y = CCL20,
x = IL2RG,
centrality.para = "mean",
margins = "both",
xfill = "#CC79A7",
yfill = "#009E73",
marginal.type = "histogram",
title = "Relationship between CCL20 and IL2RG")
## Warning: This plot can't be further modified with `ggplot2` functions.
## In case you want a `ggplot` object, set `marginal = FALSE`.
#負(fù)相關(guān)的選取MARK1
ggscatterstats(data = exprSet,
y = CCL20,
x = MARK1,
centrality.para = "mean",
margins = "both",
xfill = "#CC79A7",
yfill = "#009E73",
marginal.type = "histogram",
title = "Relationship between CCL20 and IL2RG")
## Warning: This plot can't be further modified with `ggplot2` functions.
## In case you want a `ggplot` object, set `marginal = FALSE`.
#還可以用cowplot拼圖
p1 <- ggscatterstats(data = exprSet,
y = CCL20,
x = IL2RG,
centrality.para = "mean",
margins = "both",
xfill = "#CC79A7",
yfill = "#009E73",
marginal.type = "histogram",
title = "Relationship between CCL20 and IL2RG")
## Warning: This plot can't be further modified with `ggplot2` functions.
## In case you want a `ggplot` object, set `marginal = FALSE`.
p2 <- ggscatterstats(data = exprSet,
y = CCL20,
x = MARK1,
centrality.para = "mean",
margins = "both",
xfill = "#CC79A7",
yfill = "#009E73",
marginal.type = "histogram",
title = "Relationship between CCL20 and IL2RG")
## Warning: This plot can't be further modified with `ggplot2` functions.
## In case you want a `ggplot` object, set `marginal = FALSE`.
plot_grid(p1,p2,nrow = 1,labels = LETTERS[1:2])
5.聚類分析
既然確定了相關(guān)性是正確的椒丧,那么用篩選的基因進(jìn)行富集分析就可以反推這個(gè)基因的功能壹甥。
#獲得基因列表
gene <- str_trim(cor_data_sig$symbol,'both')
#基因名稱轉(zhuǎn)換,返回的是數(shù)據(jù)框
gene = bitr(gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
go <- enrichGO(gene = gene$ENTREZID, OrgDb = "org.Hs.eg.db", ont="all")
# 這里因?yàn)槭怯?jì)算的所有GO分析的三個(gè)分類壶熏,所以可以合并作圖
# 這是條形圖
barplot(go, split="ONTOLOGY")+
facet_grid(ONTOLOGY~., scale="free")
# 這是氣泡圖
dotplot(go, split="ONTOLOGY")+
facet_grid(ONTOLOGY~., scale="free")
#
# 這時(shí)候句柠,我們能推斷CCL20這個(gè)基因主要參與免疫調(diào)控和T細(xì)胞激活,細(xì)胞因子受體活性調(diào)劑等功能棒假,大致跟她本身的功能是一致的溯职。
參考