與之前學(xué)過的AUCell一樣笤闯,Seurat里面自帶的函數(shù)AddModuleScore也可以給細胞打分惦银,我用之前拿的數(shù)據(jù)咆课,把AUCell換成AddModuleScore玩一下。
1.提取指定GOterm的基因
在這篇文章里提到扯俱,Reactive oxygen species (ROS) 活性氧相關(guān)的17個GO條目,可以把這些基因提取出來书蚪。使用msigdbr包搞定。Data_Sheet_2.XLSX是文章的附件
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7606976/bin/Data_Sheet_2.XLSX
rm(list = ls())
library(tidyverse)
dat = rio::import("Data_Sheet_2.XLSX")
g = dat[-1,1]
g
## [1] "GO_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [2] "GO_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [3] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [4] "GO_NEGATIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [5] "GO_NEGATIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [6] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [7] "GO_MITOCHONDRIAL_ELECTRON_TRANSPORT_CYTOCHROME_C_TO_OXYGEN"
## [8] "GO_SUPEROXIDE_GENERATING_NADPH_OXIDASE_ACTIVITY"
## [9] "GO_HYDROGEN_PEROXIDE_METABOLIC_PROCESS"
## [10] "GO_HYDROGEN_PEROXIDE_CATABOLIC_PROCESS"
## [11] "GO_PENTOSE_METABOLIC_PROCESS"
## [12] "GO_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
## [13] "GO_REGULATION_OF_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
## [14] "GO_CELLULAR_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
## [15] "GO_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [16] "GO_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [17] "GO_NEGATIVE_REGULATION_OF_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
17條term??
2.從msigdbr查找這些term
library(msigdbr)
GO_df = msigdbr(species = "Homo sapiens",category = "C5") %>%
dplyr::select(gene_symbol,gs_name,gs_subcat)
dim(GO_df)
## [1] 1424471 3
table(GO_df$gs_subcat)
##
## GO:BP GO:CC GO:MF HPO
## 721379 115769 122674 464649
GO_df = GO_df[GO_df$gs_subcat!="HPO",]
table(g %in% GO_df$gs_name)
##
## FALSE
## 17
全部匹配失敗啦迅栅?
其實是因為前綴不同
head(GO_df$gs_name,3)
## [1] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
## [2] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
## [3] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
head(g,3)
## [1] "GO_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [2] "GO_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [3] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
把前綴去掉來匹配善炫,順便把大寫變成小寫,變得易讀一點库继。
sn = GO_df$gs_name %>%
str_to_lower()%>%
str_replace("_","http:///") %>%
str_split("http:///",simplify = T)%>%
.[,2] %>%
str_replace_all("_"," ")
head(sn)
## [1] "10 formyltetrahydrofolate metabolic process"
## [2] "10 formyltetrahydrofolate metabolic process"
## [3] "10 formyltetrahydrofolate metabolic process"
## [4] "10 formyltetrahydrofolate metabolic process"
## [5] "10 formyltetrahydrofolate metabolic process"
## [6] "10 formyltetrahydrofolate metabolic process"
GO_df = mutate(GO_df,short_name = sn)
g = str_to_lower(g)%>%
str_replace("_","http:///") %>%
str_split("http:///",simplify = T)%>%
.[,2] %>%
str_replace_all("_"," ")
head(g)
## [1] "reactive oxygen species biosynthetic process"
## [2] "reactive oxygen species metabolic process"
## [3] "positive regulation of reactive oxygen species biosynthetic process"
## [4] "negative regulation of reactive oxygen species biosynthetic process"
## [5] "negative regulation of reactive oxygen species metabolic process"
## [6] "positive regulation of reactive oxygen species metabolic process"
table(g %in% GO_df$short_name)
##
## FALSE TRUE
## 1 16
經(jīng)過數(shù)據(jù)框搜索箩艺,發(fā)現(xiàn)匹配不上的那個其實是因為少了倆空格。宪萄。艺谆。
所以直接改一下就好。
“superoxide generating nadph oxidase activity”
“superoxide generating nad p h oxidase activity”
g[!(g %in% GO_df$short_name)] = "superoxide generating nad p h oxidase activity"
table(g %in% GO_df$short_name)
##
## TRUE
## 17
rosgene = GO_df %>%
filter(short_name %in% g) %>%
pull(gene_symbol) %>%
unique()
length(rosgene)
## [1] 402
2.打分
與之前做過的Aucell不同拜英,這個是seurat自帶的函數(shù)静汤,使用起來更加簡單
sce.all是seurat+singleR得出的對象,markers是findallmarkers得到的數(shù)據(jù)框居凶。
load("sce.all.Rdata")
load("makers.Rdata")
k = markers$p_val_adj < 0.05 &
abs(markers$avg_log2FC) > 1.5 &
markers$pct.1 > 0.5 &
markers$pct.2 < 0.5
table(k)
## k
## FALSE TRUE
## 13100 1483
gi = intersect(markers[k,"gene"],rosgene)
#32個
library(Seurat)
DotPlot(sce.all,features = gi,cols = "RdYlBu",
group.by = "RNA_snn_res.0.5")+RotatedAxis()
sce.all = AddModuleScore(object = sce.all,list(gi))
colnames(sce.all@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "patient"
## [5] "time" "sample_ID" "clst" "TSNE_x"
## [9] "TSNE_y" "percent.mt" "percent.hb" "RNA_snn_res.0.5"
## [13] "seurat_clusters" "Cluster1"
這個分數(shù)是直接加到meta.data里面的咯虫给。列名也有默認,叫Cluster1.
3.FeaturePlot可視化
標準版:
library(Seurat)
FeaturePlot(sce.all,features = "Cluster1")
定制版:
dat<- data.frame(sce.all@meta.data,
sce.all@reductions$umap@cell.embeddings,
sce.all@active.ident)
colnames(dat)[ncol(dat)] = "seurat_annotation"
class_avg <- dat %>%
group_by(seurat_annotation) %>%
summarise(
UMAP_1 = median(UMAP_1),
UMAP_2 = median(UMAP_2)
)
library(ggpubr)
ggplot(dat, aes(UMAP_1, UMAP_2)) +
geom_point(aes(colour = Cluster1)) + viridis::scale_color_viridis(option="A") +
ggrepel::geom_label_repel(aes(label = seurat_annotation),
data = class_avg,
label.size = 0,
segment.color = NA)+
theme(legend.position = "none") + theme_bw()
dat$score_group = ifelse(dat$Cluster1>median(dat$Cluster1),"active","non_active")
4.活躍和不活躍組的細胞比例條形圖
ggplot(dat,aes(score_group,fill = seurat_annotation))+
geom_bar(position = "fill", alpha = 0.9)+
scale_fill_brewer(palette = "Set1")+
theme_classic()