前面關于MetaNeighbor的帖子如蚜,我們學習了如何用MetaNeighbor來比較兩個數據集細胞類型注釋的一致性压恒。而用Seurat的FindIntegrationAnchors和IntegrateData函數,我們學習了如何跨物種對兩個seurat數據集進行整合错邦,今天結合這2個學習探赫,我們嘗試能否衡量兩個物種細胞類型注釋的一致性。(但是方法不一定正確和有效撬呢,也沒有進行合理性的評價)
數據集我們依舊選取的和前面一樣伦吠,拿arabidopsis和rice的單細胞數據來試試看(隨便從paper里面選了一組擬南芥根和水稻根的數據作為測試而已)。
對于測試數據集的構建魂拦,和前面的部分是一樣的毛仪。
library(Seurat)
library(SeuratData)
library(patchwork)
library(harmony)
#library(rliger)
library(RColorBrewer)
library(tidyverse)
library(reshape2)
library(ggsci)
library(ggstatsplot)
#常規(guī)的構建seurat對象
arabidopsis.data <- Read10X(data.dir = "arabidopsis/filtered_feature_bc_matrix/")
rice.data <- Read10X(data.dir = "rice/filtered_feature_bc_matrix/")
arabidopsis <- CreateSeuratObject(counts = arabidopsis.data, project = "arabidopsis", min.cells = 3, min.features = 200)
rice <- CreateSeuratObject(counts = rice.data, project = "rice", min.cells = 3, min.features = 200)
#做了一些簡單的過濾
arabidopsis[["percent.mg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATMG")
arabidopsis[["percent.cg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATCG")
arabidopsis <- subset(arabidopsis, subset = nFeature_RNA >500& nFeature_RNA <5000& percent.mg <1 & percent.cg <3)
rice<- subset(rice, subset = nFeature_RNA >500& nFeature_RNA <7000)
#下面就是常規(guī)的seurat的一些分析
arabidopsis<- NormalizeData(arabidopsis, normalization.method ="LogNormalize", scale.factor =10000)
arabidopsis <- FindVariableFeatures(arabidopsis, selection.method ="vst", nfeatures =5000)
all.genes <- rownames(arabidopsis)
arabidopsis <- ScaleData(arabidopsis, features = all.genes)
arabidopsis <- RunPCA(arabidopsis, features = VariableFeatures(object = arabidopsis))
arabidopsis <- FindNeighbors(arabidopsis, dims = 1:30)
arabidopsis <- FindClusters(arabidopsis, resolution = 0.5)
arabidopsis <- RunUMAP(arabidopsis, dims = 1:30)
arabidopsis <- RunTSNE(arabidopsis, dims = 1:30)
rice<- NormalizeData(rice, normalization.method ="LogNormalize", scale.factor =10000)
rice <- FindVariableFeatures(rice, selection.method ="vst", nfeatures =5000)
all.genes <- rownames(rice)
rice <- ScaleData(rice, features = all.genes)
rice <- RunPCA(rice, features = VariableFeatures(object = rice))
rice <- FindNeighbors(rice, dims = 1:30)
rice <- FindClusters(rice, resolution = 0.5)
rice <- RunUMAP(rice, dims = 1:30)
rice <- RunTSNE(rice, dims = 1:30)
#下面用一些常用的marker基因來標記每個cluster,簡單做一下芯勘,沒有深入對比
Phloem_companion_cell<-c("AT3G19550")
Quiescent_center<-c("AT1G69490","AT3G16150","AT3G53040")
Xylem<-c("AT1G68810")
Non_hair_cell<-c("AT1G68560","AT5G66800")
Cortex<-c("AT1G44970","AT3G12700","AT5G44130")
Epidermis<-c("AT2G16230")
Root_endodermis<-c("AT1G61590","AT4G02090","AT5G42180")
Phloem<-c("AT1G79430","AT4G03500")
Protophloem_sieve_element<-c("AT1G14730")
Protoxylem<-c("AT1G66810")
Trichoblast<-c("AT1G48930")
Lateral_root_cap<-c("AT4G36250","AT5G17520")
Stele<-c("AT4G37650")
all <- c(Phloem_companion_cell,Quiescent_center,Xylem,Non_hair_cell,Cortex,Epidermis,Root_endodermis,Phloem,Protophloem_sieve_element,Protoxylem,Trichoblast,Lateral_root_cap,Stele)
DotPlot(arabidopsis, features = all, cols = c("blue", "red"), dot.scale = 8) +RotatedAxis()
arabidopsis<- RenameIdents(arabidopsis, '0' = "Cortex", '1' = "Lateral root cap",
? ? ? ? ? ? '2' = "Non hair cell",'3' = "Cortex", '4' = "Cortex", '5' = "Non hair cell",
'6' = "Stele", '7' = "Xylem", '8' = "Trichoblast", '9' = "Quiescent center",
'10' = "Cortex", '11' = "Stele", '12' = "Quiescent center", '13' = "Quiescent center",
'14' = "Root endodermis",'15'="Root endodermis",'16'="Trichoblast",'17'="Root endodermis",'18'="Xylem",
'19'="Phloem",'20'="Protophloem sieve element",'21'="Phloem",'22'="Cortex") ?
DimPlot(arabidopsis, reduction = "tsne", label = TRUE)
list <- c("EXPA8","AT5G50200","OsPAP10c","OsSNDP1","PT2","CSLD1","OsEXPA17","OsbHLH115",
? ? ? ? ? "OsMST1","LSI2","OsADF3","AT1G33260","OsAAP6","LSI1","OsSCR2",
? ? ? ? ? "ERF131","AT1G51340","OsEMSA1","OsCLIP","AT3G02850","CESA7","BC6","OsSWN6",
? ? ? ? ? "OsHB4","OsFTIP1","NRAMP3","AT1G59750","Os03g0279200","AT3G45980","AT2G29570","OsRPA32",
? ? ? ? ? "AT1G20930","CycB1","ND1","AT3G25980","AT4G14330","AT1G63100","OsDLK","AT2G38905")
DotPlot(rice, features = all, cols = c("blue", "red"), dot.scale = 8) +RotatedAxis()
rice<- RenameIdents(rice, '0' = "Exodermis", '1' = "Root endodermis",
? ? ? ? ? ? '2' = "Meristem",'3' = "Root endodermis", '4' = "Vascular cylinder", '5' = "Vascular cylinder",
'6' = "Epidermis", '7' = "Epidermis", '8' = "Exodermis", '9' = "Vascular cylinder",
'10' = "Exodermis", '11' = "Meristem", '12' = "Root endodermis", '13' = "Vascular cylinder",
'14' = "Vascular cylinder",'15'="Vascular cylinder",'16'="Meristem",'17'="Pericyle",'18'="Vascular cylinder",
'19'="Epidermis",'20'="Exodermis")
DimPlot(rice, reduction = "tsne", label = TRUE)++NoLegend()
arabidopsis$celltype <- Idents(arabidopsis)
rice$celltype <- Idents(rice)
#獲得擬南芥和水稻共有基因的
common_genes <- rownames(arabidopsis)[rownames(arabidopsis) %in% rownames(rice)]
> length(common_genes)
[1] 9042
#提取共有的部分進行對比
arabidopsis <- arabidopsis[rownames(arabidopsis) %in% common_genes,]
rice <- rice[rownames(rice) %in% common_genes,]
#轉化成SCE格式的對象
arabidopsis.sce <- as.SingleCellExperiment(arabidopsis)
rice.sce <- as.SingleCellExperiment(rice)
下面箱靴,利用前面的MetaNeighbor來衡量擬南芥和水稻之間cell type的相似性。
#擬南芥的數據作為訓練集荷愕,當然也可以利用MetaNeighbor例子1的做法衡怀,把擬南芥和水稻作為一個整體。
pretrained_model <- MetaNeighbor::trainModel(var_genes = rownames(arabidopsis.sce),
? ? ? ? ? ? ? ? ? ? ? ? dat = arabidopsis.sce,
? ? ? ? ? ? ? ? ? ? ? ? study_id = arabidopsis.sce$orig.ident,
? ? ? ? ? ? ? ? ? ? ? ? cell_type = arabidopsis.sce$celltype)
write.table(pretrained_model,"pretrained_arabidopsis_celltype.txt")
arabidopsis_celltype <- read.table("pretrained_arabidopsis_celltype.txt",check.names=FALSE)
colnames(colData(rice.sce))
aurocs <- MetaNeighborUS(trained_model=arabidopsis_celltype ,
? ? ? ? ? ? ? ? ? ? ? ? dat = rice.sce,
? ? ? ? ? ? ? ? ? ? ? ? study_id = rice.sce$orig.ident,
? ? ? ? ? ? ? ? ? ? ? ? cell_type = rice.sce$celltype,
fast_version=TRUE)
plotHeatmapPretrained(aurocs)
aurocs里面就是擬南芥和水稻之間每個cell type之間的AUROC值安疗,可以認為一致性抛杨,也可以當做相似性(但是,因為我在這個測試數據里面只是大概粗糙的注釋了一下荐类,不具備太多實際的生物學意義)怖现。
我們還可以以桑基圖的形式展示各個細胞類型在2個物種之間的關系玉罐。