1.R包的下載
if (!requireNamespace("BiocManager", quietly=TRUE))
install.packages("BiocManager")
# To support paralell execution:
BiocManager::install(c("doMC", "doRNG","doSNOW"))
# For the main example:
BiocManager::install(c("mixtools", "GEOquery", "SummarizedExperiment"))
# For the examples in the follow-up section of the tutorial:
BiocManager::install(c("DT", "plotly", "NMF", "d3heatmap", "shiny", "rbokeh",
"dynamicTreeCut","R2HTML","Rtsne", "zoo"))
browseVignettes("AUCell")
2.需要兩個文件内颗,一個是seruat的表達(dá)矩陣,一個是基因集(MsigDB數(shù)據(jù)庫)
#加載R包
library(AUCell)
library(clusterProfiler)
library(ggplot2)
library(Seurat)
library(SeuratData)
library(msigdbr)
library(patchwork)
rm(list=ls())
#導(dǎo)入數(shù)據(jù)
#導(dǎo)入seurat對象
scRNA <- readRDS("scRNAsub.rds") #這個是我自己的單細(xì)胞數(shù)據(jù)
dim(scRNA)
cells_rankings <- AUCell_buildRankings(scRNA@assays$RNA@data) # 關(guān)鍵一步 細(xì)胞的表達(dá)矩陣
#制作基因集
#選擇GO-BP
a=msigdbr(species = "Homo sapiens",category = "C5",subcategory = c("BP"))
a = subset(a,select =c("gs_name","gene_symbol")) %>% as.data.frame()
#選擇GO-MF
b=msigdbr(species = "Homo sapiens",category = "C5",subcategory = c("MF"))
b = subset(b,select =c("gs_name","gene_symbol")) %>% as.data.frame()
#將二者合并
c <- rbind(a,b)
genesets = split(c$gene_symbol,c$gs_name)
#這樣就制作好了關(guān)于GO BP與MF的基因集(唉敦腔,從這里就可以看出我的水平了均澳,一言難盡......)
3.計算AUC: 耗時長
cells_AUC <- AUCell_calcAUC(genesets, cells_rankings, aucMaxRank=nrow(cells_rankings)*0.1)
4.找一些通路(該找哪些通路呢?) 用grep函數(shù)找
length(rownames(cells_AUC@assays@data$AUC)) #查看一下一共有多少條
data.frame(grep("METABOLIC",rownames(cells_AUC@assays@data$AUC),value = T)) #找與代謝相關(guān)的term
#選擇其中一條符衔,進(jìn)行plot ##set gene set of interest here for plotting
geneSet <- "GOBP_VITAMIN_B6_METABOLIC_PROCESS"
aucs <- as.numeric(getAUC(cells_AUC)[geneSet, ]) #提取這個通路在每一個細(xì)胞的得分
scRNA$AUC <- aucs #將得分添加入scRNA(seruat)對象
saveRDS(scRNA,"scRNAsub.rds") #保存數(shù)據(jù)
#選擇細(xì)胞展示的維度
df<- data.frame(scRNA@meta.data, scRNA@reductions$umap@cell.embeddings) #選擇用UMAP維度看 也可以選擇TSNE
head(df)
#我們看到每個細(xì)胞現(xiàn)在都加上AUC值了找前,下面做一下可視化。
5.plot
#做一個注釋文件 就好比要在什么層次對細(xì)胞進(jìn)行注釋
class_avg <- df %>%
group_by(celltype) %>% #這里可以改成cluster seurat_clusters/或者其他的annotation
summarise(
UMAP_1 = median(UMAP_1),
UMAP_2 = median(UMAP_2)
)
#通過ggplot畫圖
ggplot(df, aes(UMAP_1, UMAP_2)) +
geom_point(aes(colour = AUC)) + viridis::scale_color_viridis(option="A") +
ggrepel::geom_label_repel(aes(label = celltype),
data = class_avg,
size = 6,
label.size = 0,
segment.color = NA
)+ theme(legend.position = "none") + theme_bw()
6.references:
http://www.reibang.com/p/784a04c49873
http://www.reibang.com/p/2fb20f44da67