pseudotime
比較不同pseudoytime分析工具
在許多情況下弧圆,我們都在研究細(xì)胞不斷變化的過(guò)程尚粘。例如,這包括在發(fā)育過(guò)程中發(fā)生的許多分化過(guò)程:在刺激后事富,細(xì)胞將從一種細(xì)胞類型轉(zhuǎn)變?yōu)榱硪环N細(xì)胞類型揪漩。由于一些細(xì)胞在分化過(guò)程中比其他細(xì)胞進(jìn)行得更快旋恼,因此每個(gè)snapshot可能包含在發(fā)育過(guò)程中不同位置的細(xì)胞。我們使用統(tǒng)計(jì)方法對(duì)細(xì)胞沿著一個(gè)或多個(gè)代表潛在發(fā)育軌跡的軌跡進(jìn)行排序奄容,這種排序被稱為偽時(shí)間冰更。
library(SingleCellExperiment)
library(TSCAN)
library(M3Drop)
library(monocle)
library(destiny)
library(SLICER)
library(scater)
## Warning: package 'scater' was built under R version 3.5.2
library(ggplot2)
library(ggthemes)
library(ggbeeswarm)
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.5.2
set.seed(1234)
deng_SCE <- readRDS("D:/paopaoR/deng/deng-reads.rds")
deng_SCE
## class: SingleCellExperiment
## dim: 22431 268
## metadata(0):
## assays(2): counts logcounts
## rownames(22431): Hvcn1 Gbp7 ... Sox5 Alg11
## rowData names(10): feature_symbol is_feature_control ...
## total_counts log10_total_counts
## colnames(268): 16cell 16cell.1 ... zy.2 zy.3
## colData names(30): cell_type2 cell_type1 ... pct_counts_ERCC
## is_cell_control
## reducedDimNames(0):
## spikeNames(1): ERCC
deng_SCE$cell_type2 <- factor(
deng_SCE$cell_type2,
levels = c("zy", "early2cell", "mid2cell", "late2cell",
"4cell", "8cell", "16cell", "earlyblast",
"midblast", "lateblast")
)
cellLabels <- deng_SCE$cell_type2
數(shù)據(jù)集由來(lái)自小鼠早期發(fā)育的10個(gè)不同時(shí)間點(diǎn)的268個(gè)細(xì)胞組成。細(xì)胞label可以作為真實(shí)時(shí)間標(biāo)準(zhǔn)來(lái)評(píng)價(jià)偽時(shí)間的準(zhǔn)確性昂勒。
deng <- counts(deng_SCE)
colnames(deng) <- cellLabels
deng_SCE <- runPCA(deng_SCE)
plotPCA(deng_SCE, colour_by = "cell_type2")
## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.
image.png
deng_SCE$PC1 <- reducedDim(deng_SCE)[,1]
ggplot(as.data.frame(colData(deng_SCE)), aes(x = PC1, y = cell_type2,
colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("First principal component") + ylab("Timepoint") +
ggtitle("Cells ordered by first principal component")
image.png
TSCAN
procdeng <- TSCAN::preprocess(deng)
colnames(procdeng) <- 1:ncol(deng)
dengclust <- TSCAN::exprmclust(procdeng, clusternum = 10)
TSCAN::plotmclust(dengclust)
image.png
dengorderTSCAN <- TSCAN::TSCANorder(dengclust, orderonly = FALSE)
pseudotime_order_tscan <- as.character(dengorderTSCAN$sample_name)
deng_SCE$pseudotime_order_tscan <- NA
deng_SCE$pseudotime_order_tscan[as.numeric(dengorderTSCAN$sample_name)] <-
dengorderTSCAN$Pseudotime
cellLabels[dengclust$clusterid == 10]
## [1] late2cell late2cell late2cell late2cell late2cell late2cell late2cell
## [8] late2cell late2cell late2cell
## 10 Levels: zy early2cell mid2cell late2cell 4cell 8cell ... lateblast
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_order_tscan,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("TSCAN pseudotime") + ylab("Timepoint") +
ggtitle("Cells ordered by TSCAN pseudotime")
## Warning: Removed 47 rows containing missing values (position_quasirandom).
image.png
MONOCLE
m3dGenes <- as.character(
M3DropFeatureSelection(deng)$Gene
)
## Warning in bg__calc_variables(expr_mat): Warning: Removing 1134 undetected
## genes.
image.png
d <- deng[which(rownames(deng) %in% m3dGenes), ]
d <- d[!duplicated(rownames(d)), ]
colnames(d) <- 1:ncol(d)
geneNames <- rownames(d)
rownames(d) <- 1:nrow(d)
pd <- data.frame(timepoint = cellLabels)
pd <- new("AnnotatedDataFrame", data=pd)
fd <- data.frame(gene_short_name = geneNames)
fd <- new("AnnotatedDataFrame", data=fd)
dCellData <- newCellDataSet(d, phenoData = pd, featureData = fd, expressionFamily = tobit())
dCellData <- setOrderingFilter(dCellData, which(geneNames %in% m3dGenes))
dCellData <- estimateSizeFactors(dCellData)
dCellDataSet <- reduceDimension(dCellData, pseudo_expr = 1)
## Warning in if (cds@expressionFamily@vfamily %in% c("negbinomial",
## "negbinomial.size")) {: 條件的長(zhǎng)度大于一蜀细,因此只能用其第一元素
## Warning in if (cds@expressionFamily@vfamily == "binomialff") {: 條件的長(zhǎng)度
## 大于一,因此只能用其第一元素
## Warning in if (cds@expressionFamily@vfamily == "Tobit") {: 條件的長(zhǎng)度大于
## 一戈盈,因此只能用其第一元素
## Warning in if (cds@expressionFamily@vfamily == "uninormal") {: 條件的長(zhǎng)度大
## 于一奠衔,因此只能用其第一元素
dCellDataSet <- orderCells(dCellDataSet, reverse = FALSE)
plot_cell_trajectory(dCellDataSet)
image.png
pseudotime_monocle <-
data.frame(
Timepoint = phenoData(dCellDataSet)$timepoint,
pseudotime = phenoData(dCellDataSet)$Pseudotime,
State = phenoData(dCellDataSet)$State
)
rownames(pseudotime_monocle) <- 1:ncol(d)
pseudotime_order_monocle <-
rownames(pseudotime_monocle[order(pseudotime_monocle$pseudotime), ])
deng_SCE$pseudotime_monocle <- pseudotime_monocle$pseudotime
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_monocle,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("monocle pseudotime") + ylab("Timepoint") +
ggtitle("Cells ordered by monocle pseudotime")
image.png
Diffusion maps
deng <- logcounts(deng_SCE)
colnames(deng) <- cellLabels
dm <- DiffusionMap(t(deng))
tmp <- data.frame(DC1 = eigenvectors(dm)[,1],
DC2 = eigenvectors(dm)[,2],
Timepoint = deng_SCE$cell_type2)
ggplot(tmp, aes(x = DC1, y = DC2, colour = Timepoint)) +
geom_point() + scale_color_tableau() +
xlab("Diffusion component 1") +
ylab("Diffusion component 2") +
theme_classic()
image.png
deng_SCE$pseudotime_diffusionmap <- rank(eigenvectors(dm)[,1])
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_diffusionmap,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("Diffusion map pseudotime (first diffusion map component)") +
ylab("Timepoint") +
ggtitle("Cells ordered by diffusion map pseudotime")
image.png
SLICER
這個(gè)包沒(méi)有很常見(jiàn)到。找到可能的start細(xì)胞后塘娶,branch分析有時(shí)候會(huì)報(bào)錯(cuò)归斤,GitHub上也有人同樣的問(wèn)題,換個(gè)細(xì)胞可能會(huì)運(yùn)行成功刁岸,但是結(jié)果可能就不對(duì)了脏里。
library("lle")
## Warning: package 'lle' was built under R version 3.5.2
## Warning: package 'snowfall' was built under R version 3.5.2
genes <- select_genes(t(deng))
k <- select_k(t(deng[genes,]), kmin = 30, kmax=60)
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
traj_lle <- lle(t(deng[genes,]), m = 2, k)$Y
## finding neighbours
## calculating weights
## computing coordinates
reducedDim(deng_SCE, "LLE") <- traj_lle
plotReducedDim(deng_SCE, use_dimred = "LLE", colour_by = "cell_type2") +
xlab("LLE component 1") + ylab("LLE component 2") +
ggtitle("Locally linear embedding of cells from SLICER")
## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.
image.png
traj_graph <- conn_knn_graph(traj_lle, 10)
plot(traj_graph, main = "Fully connected kNN graph from SLICER")
image.png
ends <- find_extreme_cells(traj_graph, traj_lle)
image.png
start <- ends[1]
pseudotime_order_slicer <- cell_order(traj_graph, start)
branches <- assign_branches(traj_graph, start)
pseudotime_slicer <-
data.frame(
Timepoint = cellLabels,
pseudotime = NA,
State = branches
)
pseudotime_slicer$pseudotime[pseudotime_order_slicer] <-
1:length(pseudotime_order_slicer)
deng_SCE$pseudotime_slicer <- pseudotime_slicer$pseudotime
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_slicer,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("SLICER pseudotime (cell ordering)") +
ylab("Timepoint") +
theme_classic()
image.png