前言
本次主要使用Seurat比較STAR與Cellranger的輸出結(jié)果胶台,只會進行簡單的聚類工作臣咖。
數(shù)據(jù)讀入
> library(Seurat)
> library(dplyr)
> library(magrittr)
> library(gtools)
> library(stringr)
> library(Matrix)
> setwd("D://data/ScRNAcode")
##讀入STAR數(shù)據(jù)
> matrix.dir="STAR/"
> barcode.path <- paste0(matrix.dir,"barcodes.tsv")
> features.path <- paste0(matrix.dir,"features.tsv")
> matrix.path <- paste0(matrix.dir, "matrix.mtx")
> STARmatrix <- readMM(file = matrix.path)
> feature.names = read.delim(features.path,
+ header = FALSE,
+ stringsAsFactors = FALSE)
> barcode.names = read.delim(barcode.path,
+ header = FALSE,
+ stringsAsFactors = FALSE)
> colnames(STARmatrix) = barcode.names$V1
> rownames(STARmatrix) = feature.names$V2
> STARmatrix<-as.matrix(STARmatrix)
> STARmatrix[1:6,1:6]
##創(chuàng)建seurat對象
##創(chuàng)建STAR的Seurat對象
> STAR <- CreateSeuratObject(STARmatrix,project = "zsz",
min.cells = 3, min.features = 200)
##創(chuàng)建Cellranger的Seurat對象
> dir="cellranger/"
> counts <- Read10X(data.dir = dir)
> RANGER <- CreateSeuratObject(counts, project = "zsz",
min.cells=3, min.features = 200)
數(shù)據(jù)比較
> ##數(shù)據(jù)比較
> dim(STARmatrix)
[1] 33538 2048
> dim(counts)
[1] 33538 2112
> dim(STAR)
[1] 13350 2046
> dim(RANGER)
[1] 13314 2105
> fivenum(apply(STARmatrix,1,function(x) sum(x>0)))
MIR1302-2HG IGFBPL1 AL008723.2 FXYD7 MT-CO1
0 0 0 29 2047
> fivenum(apply(counts,1,function(x) sum(x>0)))
MIR1302-2HG FAM221B LINC01638 DGKE MT-CO1
0 0 0 29 2108
> pdf("box.pdf",height = 9,width = 9)
> boxplot(apply(STARmatrix,1,function(x) sum(x>0) ),main = "STAR",col = "lightgray")
> boxplot(apply(counts,1,function(x) sum(x>0) ),main = "Cellranger",col = "lightgray")
> dev.off()
RStudioGD
2
> pdf("hist.pdf",height = 9,width = 9)
> hist(apply(STARmatrix,2,function(x) sum(x>0) ),col = "lightgray",
+ breaks=20,xlim=c(0,4000),ylim=c(0,800),
+ labels=F,main="STAR",
+ xlab="genes",ylab="cells")
> abline(v=median(apply(STARmatrix,2,function(x) sum(x>0))),col='red')
> hist(apply(counts,2,function(x) sum(x>0) ),col = "lightgray",
+ breaks=20,xlim=c(0,4000),ylim=c(0,800),
+ labels=F,main="Cellranger",
+ xlab="genes",ylab="cells")
> abline(v=median(apply(counts,2,function(x) sum(x>0))),col='red')
> dev.off()
根據(jù)箱線圖踏幻,直方圖和矩陣的基本信息,可以看到STAR與cellranger的結(jié)果差距很小
質(zhì)量控制與聚類比較
> pdf("qc.pdf",height = 9,width = 9)
> VlnPlot(STAR,
+ features = c("nFeature_RNA", "nCount_RNA"),
+ pt.size = 0.1,
+ ncol = 2)
> VlnPlot(RANGER,
+ features = c("nFeature_RNA", "nCount_RNA"),
+ pt.size = 0.1,
+ ncol = 2)
> dev.off()
RStudioGD
2
> STAR<-subset(STAR,subset=nFeature_RNA>500 & nFeature_RNA<2000)
> STAR <- NormalizeData(STAR, normalization.method = "LogNormalize", scale.factor = 10000)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
> table(STAR@meta.data$orig.ident)
zsz
1896
> STAR <- FindVariableFeatures(STAR, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
> top10 <- head(VariableFeatures(STAR), 10)
> top10
[1] "S100A9" "S100A8" "IGLC2" "IGKC" "LYZ" "IGLC3" "CCL3" "NFKBIA" "PTGDS" "S100A12"
> RANGER<-subset(RANGER,subset=nFeature_RNA>500 & nFeature_RNA<2000)
> RANGER<- NormalizeData(RANGER, normalization.method = "LogNormalize", scale.factor = 10000)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
> table(RANGER@meta.data$orig.ident)
zsz
1895
> RANGER <- FindVariableFeatures(RANGER, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
> top10 <- head(VariableFeatures(RANGER), 10)
> top10
[1] "S100A9" "S100A8" "IGLC2" "IGKC" "LYZ" "CCL3" "NFKBIA" "IGLC3" "PTGDS" "S100A12"
> scale.genes <- rownames(STAR)
> STAR <- ScaleData(STAR, features = scale.genes)
> STAR <- RunPCA(STAR, features = VariableFeatures(STAR))
> plot1 <- ElbowPlot(STAR, ndims=30, reduction="pca")
> scale.genes <- rownames(RANGER)
> RANGER <- ScaleData(RANGER, features = scale.genes)
> RANGER <- RunPCA(RANGER, features = VariableFeatures(RANGER))
> plot2 <- ElbowPlot(RANGER, ndims=30, reduction="pca")
> plotc <- plot1+plot2
> ggsave("pca.pdf", plot = plotc, width = 8, height = 4)
> STAR <- FindNeighbors(STAR, dims = 1:10)
> STAR <- FindClusters(STAR, resolution = 0.8)
> table(STAR@meta.data$seurat_clusters)
0 1 2 3 4 5 6 7 8 9
368 310 290 217 184 129 119 112 92 75
> metadata <- STAR@meta.data
> cell_cluster <-data.frame(cell_ID=rownames(metadata),
cluster_ID=metadata$seurat_clusters)
> STAR <- RunUMAP(STAR, dims = 1:20)
> embed_tsne <- Embeddings(STAR, 'umap')
> plot1 = DimPlot(STAR, reduction = "umap" ,label = "T", pt.size = 1,label.size = 4)
> RANGER <- FindNeighbors(RANGER, dims = 1:10)
> RANGER <- FindClusters(RANGER, resolution = 0.8)
> table(RANGER@meta.data$seurat_clusters)
0 1 2 3 4 5 6 7 8 9
375 300 290 212 195 128 122 108 91 74
> metadata <- RANGER@meta.data
> cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
> RANGER <- RunUMAP(RANGER,n.neighbors = 30,dims = 1:20)
> embed_umap <- Embeddings(RANGER, 'umap')
> plot2 = DimPlot(RANGER, reduction = "umap" ,label = "T", pt.size = 1,label.size = 4)
> plotc <- plot1+plot2
> ggsave("umap.pdf", plot = plotc, width = 8, height = 4)
可以看到兩種分析方法umap與tsne聚類效果不太相同泽论,但是基本的聚類與分群是一致的
結(jié)語
就結(jié)果而言艾少,兩種分析方法的結(jié)果不完全相同,但也基本一致翼悴,本次筆記中用到的數(shù)據(jù)和代碼已上傳github缚够,在ScRNAseq_code/compare文件夾下,大家需要的可以下載試試
轉(zhuǎn)載請注明:周小釗的博客>>>單細胞實戰(zhàn)(4):STAR與cellranger結(jié)果比較