適用背景
單細(xì)胞轉(zhuǎn)錄組分析中如果涉及到發(fā)育或具有時(shí)間序列的細(xì)胞譜系,往往需要進(jìn)行擬時(shí)序分析,而最最最常用的擬時(shí)序分析軟件就是Monocle须教,如今它已經(jīng)出到了第三個(gè)版本monocle3。關(guān)于monocle3與monocle2之間的區(qū)別,可以參考這篇博客病袄,有興趣可以查看monocle3的官網(wǎng)教程搂赋。
安裝
建議使用conda 安裝
conda install -c conda-forge r-seurat
conda install -c conda-forge r-seuratobject
conda install -c conda-forge r-terra==1.5.12
conda install -c conda-forge r-units
conda install -c conda-forge r-raster
conda install -c conda-forge r-spdata
conda install -c conda-forge r-sf
conda install -c conda-forge r-spdep
conda install -c conda-forge r-ragg
conda install -c conda-forge r-ggrastr
conda install -c conda-forge libgit2
conda install -c conda-forge r-devtools
install.packages("BiocManager")
conda install -c anaconda cmake
在R里面進(jìn)行最后的安裝
BiocManager::install(c("nloptr", "lme4"))
if(!require(monocle3))devtools::install_github('cole-trapnell-lab/monocle3')
需要加載的R包
library(stringr)
library(ggplot2)
library(Seurat)
library(future)
library(rhdf5)
library(dplyr)
library(data.table)
library(Matrix)
library(rjson)
library(monocle3)
library(patchwork)
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)
運(yùn)行Monocle3的主函數(shù)
obj是Seurat的對(duì)象,注意需要有pca和umap降維信息益缠,assay選擇使用的assay脑奠,ex.umap默認(rèn)為F,使用Monocle3軟件運(yùn)行得到的umap幅慌,如果為T/TRUE宋欺,則使用Seurat對(duì)象的umap,group.by是細(xì)胞分組胰伍,label是輸出文件的前綴齿诞,默認(rèn)為’out’。
run_monocle3 <- function(obj,assay='RNA',ex.umap=F,group.by='seurat_clusters',label='out') {
all <- obj
all$ingroup <- all@meta.data[,group.by]
expression_matrix <- all@assays[assay][[1]]@counts
cell_metadata <- all@meta.data
gene_annotation <- data.frame(rownames(all), rownames(all))
rownames(gene_annotation) <- rownames(all)
colnames(gene_annotation) <- c("GeneSymbol", "gene_short_name")
NucSeq_cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
NucSeq_cds@int_colData$reducedDims$PCA<- all@reductions$pca@cell.embeddings
NucSeq_cds <- reduce_dimension(NucSeq_cds, reduction_method = 'UMAP', preprocess_method = "PCA")
NucSeq_cds$celltype <- NucSeq_cds$ingroup
jpeg('cds.umap.jpg')
p1 <- plot_cells(NucSeq_cds, reduction_method="UMAP", color_cells_by="celltype",show_trajectory_graph=F) + ggtitle('cds.umap')
print(p1)
dev.off()
NucSeq.coembed <- all
##import seurat umap
if(ex.umap){
cds.embed <- NucSeq_cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(NucSeq.coembed, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
NucSeq_cds@int_colData$reducedDims$UMAP <- int.embed
jpeg('int.umap.jpg')
p1 <- plot_cells(NucSeq_cds, reduction_method="UMAP", color_cells_by="celltype", ,show_trajectory_graph=F) + ggtitle('int.umap')
print(p1)
dev.off()
}
NucSeq_cds <- cluster_cells(NucSeq_cds, reduction_method='UMAP')
print(length(unique(partitions(NucSeq_cds))))
jpeg('partition.umap.jpg')
p1 <- plot_cells(NucSeq_cds, show_trajectory_graph = FALSE,group_label_size = 5,color_cells_by = "partition")
print(p1)
dev.off()
NucSeq_cds <- learn_graph(NucSeq_cds)
jpeg('celltype.umap.jpg')
p1 <- plot_cells(NucSeq_cds, color_cells_by = "celltype",
label_groups_by_cluster = FALSE, label_leaves = TRUE,
label_branch_points = TRUE,group_label_size = 5)
print(p1)
dev.off()
saveRDS(NucSeq_cds,paste0(label,".rds"))
meta <- data.frame(NucSeq_cds@colData@listData)
write.table(meta,paste0(label,"_metadata.xls"),sep='\t',quote=F)
}
選擇軌跡起點(diǎn)的函數(shù)
首先獲取細(xì)胞名列表骂租,以下是兩個(gè)例子:
cell_ids <- which(NucSeq_cds@clusters@listData[["UMAP"]][["clusters"]] == 5)
cell_ids <- which(cds$Celltypes == "OPCs")
NucSeq_cds是monocle3運(yùn)行完上面run_monocle3主函數(shù)得到的monocle3對(duì)象祷杈,cell_ids是軌跡起點(diǎn)細(xì)胞名向量,label是輸出文件的前綴菩咨,label_cell_groups吠式、label_leaves、label_branch_points抽米、graph_label_size和show_trajectory_graph是作圖設(shè)置的參數(shù)特占,可自調(diào)。
select_root <- function(NucSeq_cds,cell_ids,label='out', label_cell_groups=FALSE,label_leaves=FALSE,label_branch_points=FALSE,
graph_label_size=1.5,show_trajectory_graph = F) {
closest_vertex <- NucSeq_cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
closest_vertex <- as.matrix(closest_vertex[colnames(NucSeq_cds), ])
root_pr_nodes <- igraph::V(principal_graph(NucSeq_cds)[["UMAP"]])$name[as.numeric(names(which.max(table(closest_vertex[cell_ids,]))))]
NucSeq_cds <- order_cells(NucSeq_cds,root_pr_nodes=root_pr_nodes)
jpeg(paste0(label,'_select_root_umap.jpg'))
p1 <- plot_cells(NucSeq_cds,color_cells_by = "pseudotime",
label_cell_groups=label_cell_groups,label_leaves=label_leaves,label_branch_points=label_branch_points,
graph_label_size=graph_label_size, show_trajectory_graph = show_trajectory_graph)
print(p1)
dev.off()
}
基因的擬時(shí)序作圖函數(shù)
NucSeq_cds是monocle3運(yùn)行完上面run_monocle3主函數(shù)得到的monocle3對(duì)象云茸,ingene是需要畫(huà)的基因向量是目。
select_gene_plot <- function(NucSeq_cds,ingene=NULL) {
jpeg(paste0('select_gene_umap.jpg'))
p1 <- plot_cells(NucSeq_cds,color_cells_by = "pseudotime",label_cell_groups=F,label_leaves=T,label_branch_points=T,graph_label_size=1.5)
print(p1)
dev.off()
for (g in ingene) {
jpeg(paste0('gene_',g,'_select_gene_umap.jpg'))
p1 <- plot_cells(NucSeq_cds,
genes=g,
label_cell_groups=FALSE,
show_trajectory_graph=FALSE)
print(p1)
dev.off()
}
}
計(jì)算擬時(shí)序軌跡的相關(guān)基因及熱圖可視化
運(yùn)行g(shù)raph_test時(shí)會(huì)出現(xiàn)‘Error rBind“報(bào)錯(cuò)
#去除不需要的細(xì)胞亞群
NucSeq_cds <- NucSeq_cds[,names(NucSeq_cds@clusters@listData[["UMAP"]][["clusters"]])[NucSeq_cds@clusters@listData[["UMAP"]][["clusters"]] != 10]]
#這里有個(gè)bug,需要將calculateLW中的Matrix::rBind改為rbind才能運(yùn)行g(shù)raph_test
trace('calculateLW', edit = T, where = asNamespace("monocle3"))
sig_gene <- graph_test(NucSeq_cds, neighbor_graph="principal_graph", cores=8)
如果不想每次手動(dòng)改标捺,可以以下步驟
1懊纳、運(yùn)行trace('calculateLW', edit = T, where = asNamespace("monocle3"))后會(huì)生成一個(gè)臨時(shí)的R腳本,將其復(fù)制到一個(gè)新的R腳本文件
2亡容、新的R腳本文件中修改tmp <- Matrix::rBind(tmp, cur_tmp) 為 tmp <- rbind(tmp, cur_tmp)(https://github.com/cole-trapnell-lab/monocle3/issues/512#issuecomment-1004224006)
3嗤疯、新的R腳本文件中將jaccard_coeff全部替換為monocle3:::jaccard_coeff
4、運(yùn)行trace('graph_test', edit = T, where = asNamespace("monocle3"))后復(fù)制代碼到新的R腳本文件中并把名字改為graph_test_new闺兢。
5茂缚、加載這個(gè)新腳本后,運(yùn)行g(shù)raph_test_new函數(shù)屋谭。
6脚囊、calculateLW函數(shù)還需要將my.moran.test改為monocle3:::my.moran.test,my.geary.test改為monocle3:::my.geary.test桐磁。
函數(shù)obtain_sigene中悔耘,NucSeq_cds是monocle3運(yùn)行完上面run_monocle3主函數(shù)得到的monocle3對(duì)象,obj之前輸入的Seurat對(duì)象我擂,neighbor_graph選擇使用的graph衬以,一般使用principal_graph就好缓艳,q_value.cutoff是q_value閾值,用于篩選基因泄鹏,一般設(shè)為0.01郎任,group.by是熱圖的細(xì)胞注釋。熱圖可根據(jù)這個(gè)[網(wǎng)址](https://jokergoo.github.io/ComplexHeatmap-reference/book/heatmap-annotations.html)進(jìn)行修改备籽。
obtain_sigene <- function(NucSeq_cds, obj, neighbor_graph="principal_graph", cores=8,q_value.cutoff=0.01,group.by=NULL) {
NucSeq.coembed <- obj
source('~/graph_test_new.R')
sig_gene <- graph_test_new(NucSeq_cds, neighbor_graph=neighbor_graph, cores=cores)
write.table(sig_gene,'sig_gene.xls',sep='\t',quote=F)
sig_gene <- subset(sig_gene, q_value < q_value.cutoff)
genes <- row.names(subset(sig_gene, morans_I >= quantile(sig_gene$morans_I)[4]))
pt.matrix <- GetAssayData(NucSeq.coembed,slot='data', assay='RNA')[match(genes,rownames(NucSeq.coembed)),order(pseudotime(NucSeq_cds))]
pt.matrix.raw <- pt.matrix
pt.matrix <- t(apply(pt.matrix,1,function(x){smooth.spline(x,df=3)$y}))
pt.matrix = pt.matrix[!apply(pt.matrix, 1, sd) == 0, ]
pt.matrix = Matrix::t(scale(Matrix::t(pt.matrix), center = TRUE))
pt.matrix = pt.matrix[is.na(row.names(pt.matrix)) == FALSE, ]
# pt.matrix[is.nan(pt.matrix)] = 0
pt.matrix[pt.matrix > 3] = 3
pt.matrix[pt.matrix < -3] = -3
row_dist <- as.dist((1 - cor(Matrix::t(pt.matrix)))/2)
# row_dist[is.na(row_dist)] <- 1
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)
lab2=HeatmapAnnotation(foo = NucSeq.coembed@meta.data[,group.by])
hthc <- Heatmap(
pt.matrix,
name = "z-score",
col = colorRamp2(seq(from=-3,to=3,length=11),rev(brewer.pal(11, "RdYlGn"))),
show_row_names = FALSE,
show_column_names = FALSE,
#row_names_gp = gpar(fontsize = 6),
clustering_distance_rows = row_dist,
clustering_method_rows = "ward.D2",
clustering_method_columns = "ward.D2",
row_title_rot = 0,
cluster_rows = TRUE,
cluster_row_slices = FALSE,
cluster_columns = FALSE,
right_annotation = NULL,
top_annotation = lab2)
jpeg('heatmap_sig_genes.jpg')
print(hthc)
dev.off()
}
graph_test_new.R的代碼
calculateLW <- function (cds, k, neighbor_graph, reduction_method, verbose = FALSE)
{
if (verbose) {
message("retrieve the matrices for Moran's I test...")
}
knn_res <- NULL
principal_g <- NULL
cell_coords <- reducedDims(cds)[[reduction_method]]
if (neighbor_graph == "knn") {
knn_res <- RANN::nn2(cell_coords, cell_coords, min(k +
1, nrow(cell_coords)), searchtype = "standard")[[1]]
}
else if (neighbor_graph == "principal_graph") {
pr_graph_node_coords <- cds@principal_graph_aux[[reduction_method]]$dp_mst
principal_g <- igraph::get.adjacency(cds@principal_graph[[reduction_method]])[colnames(pr_graph_node_coords),
colnames(pr_graph_node_coords)]
}
exprs_mat <- exprs(cds)
if (neighbor_graph == "knn") {
if (is.null(knn_res)) {
knn_res <- RANN::nn2(cell_coords, cell_coords, min(k +
1, nrow(cell_coords)), searchtype = "standard")[[1]]
}
links <- monocle3:::jaccard_coeff(knn_res[, -1], F)
links <- links[links[, 1] > 0, ]
relations <- as.data.frame(links)
colnames(relations) <- c("from", "to", "weight")
knn_res_graph <- igraph::graph.data.frame(relations,
directed = T)
knn_list <- lapply(1:nrow(knn_res), function(x) knn_res[x,
-1])
region_id_names <- colnames(cds)
id_map <- 1:ncol(cds)
names(id_map) <- id_map
points_selected <- 1:nrow(knn_res)
knn_list <- lapply(points_selected, function(x) id_map[as.character(knn_res[x,
-1])])
}
else if (neighbor_graph == "principal_graph") {
cell2pp_map <- cds@principal_graph_aux[[reduction_method]]$pr_graph_cell_proj_closest_vertex
if (is.null(cell2pp_map)) {
stop(paste("Error: projection matrix for each cell to principal",
"points doesn't exist, you may need to rerun learn_graph"))
}
cell2pp_map <- cell2pp_map[row.names(cell2pp_map) %in%
row.names(colData(cds)), , drop = FALSE]
cell2pp_map <- cell2pp_map[colnames(cds), ]
if (verbose) {
message("Identify connecting principal point pairs ...")
}
knn_res <- RANN::nn2(cell_coords, cell_coords, min(k +
1, nrow(cell_coords)), searchtype = "standard")[[1]]
principal_g_tmp <- principal_g
diag(principal_g_tmp) <- 1
cell_membership <- as.factor(cell2pp_map)
uniq_member <- sort(unique(cell_membership))
membership_matrix <- Matrix::sparse.model.matrix(~cell_membership +
0)
colnames(membership_matrix) <- levels(uniq_member)
feasible_space <- membership_matrix %*% Matrix::tcrossprod(principal_g_tmp[as.numeric(levels(uniq_member)),
as.numeric(levels(uniq_member))], membership_matrix)
links <- monocle3:::jaccard_coeff(knn_res[, -1], F)
links <- links[links[, 1] > 0, ]
relations <- as.data.frame(links)
colnames(relations) <- c("from", "to", "weight")
knn_res_graph <- igraph::graph.data.frame(relations,
directed = T)
tmp_a <- igraph::get.adjacency(knn_res_graph)
block_size <- 10000
num_blocks = ceiling(nrow(tmp_a)/block_size)
if (verbose) {
message("start calculating valid kNN graph ...")
}
tmp <- NULL
for (j in 1:num_blocks) {
if (j < num_blocks) {
block_a <- tmp_a[((((j - 1) * block_size) + 1):(j *
block_size)), ]
block_b <- feasible_space[((((j - 1) * block_size) +
1):(j * block_size)), ]
}
else {
block_a <- tmp_a[((((j - 1) * block_size) + 1):(nrow(tmp_a))),
]
block_b <- feasible_space[((((j - 1) * block_size) +
1):(nrow(tmp_a))), ]
}
cur_tmp <- block_a * block_b
if (is.null(tmp)) {
tmp <- cur_tmp
}
else {
tmp <- rbind(tmp, cur_tmp)
}
}
if (verbose) {
message("Calculating valid kNN graph, done ...")
}
region_id_names <- colnames(cds)
id_map <- 1:ncol(cds)
names(id_map) <- id_map
knn_list <- slam::rowapply_simple_triplet_matrix(slam::as.simple_triplet_matrix(tmp),
function(x) {
res <- which(as.numeric(x) > 0)
if (length(res) == 0)
res <- 0L
res
})
}
else {
stop("Error: unrecognized neighbor_graph option")
}
names(knn_list) <- id_map[names(knn_list)]
class(knn_list) <- "nb"
attr(knn_list, "region.id") <- region_id_names
attr(knn_list, "call") <- match.call()
lw <- spdep::nb2listw(knn_list, zero.policy = TRUE)
lw
}
graph_test_new <- function (cds, neighbor_graph = c("knn", "principal_graph"),
reduction_method = "UMAP", k = 25, method = c("Moran_I"),
alternative = "greater", expression_family = "quasipoisson",
cores = 1, verbose = FALSE)
{
neighbor_graph <- match.arg(neighbor_graph)
lw <- calculateLW(cds, k = k, verbose = verbose, neighbor_graph = neighbor_graph,
reduction_method = reduction_method)
if (verbose) {
message("Performing Moran's I test: ...")
}
exprs_mat <- SingleCellExperiment::counts(cds)[, attr(lw,
"region.id"), drop = FALSE]
sz <- size_factors(cds)[attr(lw, "region.id")]
wc <- spdep::spweights.constants(lw, zero.policy = TRUE,
adjust.n = TRUE)
test_res <- pbmcapply::pbmclapply(row.names(exprs_mat), FUN = function(x,
sz, alternative, method, expression_family) {
exprs_val <- exprs_mat[x, ]
if (expression_family %in% c("uninormal", "binomialff")) {
exprs_val <- exprs_val
}
else {
exprs_val <- log10(exprs_val/sz + 0.1)
}
test_res <- tryCatch({
if (method == "Moran_I") {
mt <- suppressWarnings(monocle3:::my.moran.test(exprs_val,
lw, wc, alternative = alternative))
data.frame(status = "OK", p_value = mt$p.value,
morans_test_statistic = mt$statistic, morans_I = mt$estimate[["Moran I statistic"]])
}
else if (method == "Geary_C") {
gt <- suppressWarnings(monocle3:::my.geary.test(exprs_val,
lw, wc, alternative = alternative))
data.frame(status = "OK", p_value = gt$p.value,
geary_test_statistic = gt$statistic, geary_C = gt$estimate[["Geary C statistic"]])
}
}, error = function(e) {
data.frame(status = "FAIL", p_value = NA, morans_test_statistic = NA,
morans_I = NA)
})
}, sz = sz, alternative = alternative, method = method, expression_family = expression_family,
mc.cores = cores, ignore.interactive = TRUE)
if (verbose) {
message("returning results: ...")
}
test_res <- do.call(rbind.data.frame, test_res)
row.names(test_res) <- row.names(cds)
test_res <- merge(test_res, rowData(cds), by = "row.names")
row.names(test_res) <- test_res[, 1]
test_res[, 1] <- NULL
test_res$q_value <- 1
test_res$q_value[which(test_res$status == "OK")] <- stats::p.adjust(subset(test_res,
status == "OK")[, "p_value"], method = "BH")
test_res$status = as.character(test_res$status)
test_res[row.names(cds), ]
}