示例數(shù)據(jù):GSE178911→GSM5400792→ SRR14915863~SRR14915866
- https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE178911
- wget 下載到linux服務(wù)器
https://sra-download.ncbi.nlm.nih.gov/traces/sra7/SRZ/014915/SRR14915863/201008_D0_scRNA_fastq_S1_L001_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra7/SRZ/014915/SRR14915863/201008_D0_scRNA_fastq_S1_L001_R2_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra30/SRZ/014915/SRR14915864/201008_D0_scRNA_fastq_S1_L002_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra30/SRZ/014915/SRR14915864/201008_D0_scRNA_fastq_S1_L002_R2_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra51/SRZ/014915/SRR14915865/201008_D0_scRNA_fastq_S1_L003_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra51/SRZ/014915/SRR14915865/201008_D0_scRNA_fastq_S1_L003_R2_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra67/SRZ/014915/SRR14915866/201008_D0_scRNA_fastq_S1_L004_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra67/SRZ/014915/SRR14915866/201008_D0_scRNA_fastq_S1_L004_R2_001.fastq.gz
1、cellranger比對(linux)
- 首先下載安裝cellranger
- https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest
- 解壓即可用~
-
其次下載該軟件團隊提供的配套的參考基因組文件,有人/鼠的盒延,按需下載、解壓弟跑、備用。
開始比對~~~
bin=~/biosoft/cellranger-6.0.2/bin/cellranger
db=./refdata-gex-GRCh38-2020-A
fq_dir=./fastq/GSM5400792
$bin count --id=201008_D0_scRNA_fastq \
--localcores=10 \
--transcriptome=$db \
--fastqs=$fq_dir \
--sample=201008_D0_scRNA_fastq \
--expect-cells=3000 \
--nosecondary
#比對時間視使用線程數(shù)而定
- 查看比對結(jié)果
out
文件夾
2防症、velocyto生成loom文件(linux)
- https://mp.weixin.qq.com/s/TJsdj7qesZGlvmEkNLAy0A
- 使用conda 構(gòu)建一個velocyto軟件分析環(huán)境
conda create -n velocyto
conda activate velocyto
conda install samtools
conda install numpy scipy cython numba matplotlib scikit-learn h5py click
pip install pysam
# `PyPI`安裝
pip install velocyto
- 下載基因組gtf注釋文件
需要兩個孟辑,一個是標(biāo)準(zhǔn)的基因組gtf注釋文件,在上一步下載的refdata-gex-GRCh38-2020-A
文件夾里已有蔫敲。
另一個是對repeat region注釋的gtf文件
http://velocyto.org/velocyto.py/tutorial/cli.html
You might want to mask expressed repetitive elements, since those count could constitute a confounding factor in the downstream analysis. To do so you would need to download an appropriate expressed repeat annotation (for example from UCSC genome browser and make sure to select GTF as output format).
https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=611454127_NtvlaW6xBSIRYJEBI0iRDEWisITa&clade=mammal&org=Human&db=0&hgta_group=allTracks&hgta_track=rmsk&hgta_table=rmsk&hgta_regionType=genome&position=&hgta_outputType=gff&hgta_outFileName=mm10_rmsk.gtf
- 開始分析
cellranger_outDir=201008_D0_scRNA_fastq
cellranger_gtf=refdata-gex-GRCh38-2020-A/genes/genes.gtf
rmsk_gtf=rmsk.gtf
ls -lh $rmsk_gtf $cellranger_outDir $cellranger_gtf
velocyto run10x -m $rmsk_gtf $cellranger_outDir $cellranger_gtf
#比較耗時
- 結(jié)果儲存在cellranger_outDir文件夾里的
velocyto/
子文件夾里
3饲嗽、Seurat標(biāo)準(zhǔn)流程分析(Rstudio)
- 使用Seurat包對
filtered_feature_bc_matrix
文件夾的三個文件(feature/barcode/matrix)進行標(biāo)準(zhǔn)流程分析 - 其中必須包括tSNE/uMAP降維,以及cluster/celltype分群注釋信息奈嘿。
- 可參看以前筆記貌虾,就不多做介紹了~
4 、velocyto.R速率分析裙犹、可視化(linux環(huán)境的R)
首先需要安裝velocyto.R包尽狠,比較有難度叶圃,好像不可以安裝在windows環(huán)境掺冠『芳埃可參考
https://blog.csdn.net/weixin_39568781/article/details/111249461-
然后把之前得到loom文件以及seurat對象都準(zhǔn)備到當(dāng)前工作目錄下
library(Seurat)
library(velocyto.R)
library(tidyverse)
# sce_all = readRDS("sce.rds")
# sce_all = SplitObject(sce_all, split.by = "orig.ident")
# sce_1 = sce_all[[1]]
sce_1 = readRDS("sce.rds")
sce_1@assays$RNA@counts[1:4,1:4]
velo_1 <- read.loom.matrices("201008_D0_scRNA_fastq.loom")
velo_1$spliced[1:4,1:4]
#以Seurat對象為標(biāo)準(zhǔn),統(tǒng)一loom文件里的細(xì)胞名和數(shù)量
colnames(velo_1$spliced)<-gsub("x","-1",colnames(velo_1$spliced))
colnames(velo_1$spliced)<-gsub("201008_D0_scRNA_fastq:","1_",colnames(velo_1$spliced))
colnames(velo_1$unspliced)<-colnames(velo_1$spliced)
colnames(velo_1$ambiguous)<-colnames(velo_1$spliced)
velo_1$spliced<-velo_1$spliced[,rownames(sce_1@meta.data)]
velo_1$unspliced<-velo_1$unspliced[,rownames(sce_1@meta.data)]
velo_1$ambiguous<-velo_1$ambiguous[,rownames(sce_1@meta.data)]
#速率分析
sp <- velo_1$spliced
unsp <- velo_1$unspliced
sce_1_umap <- sce_1@reductions$umap@cell.embeddings
ident.colors <- (scales::hue_pal())(n = length(x = levels(x = sce_1)))
names(x = ident.colors) <- levels(x = sce_1)
cell.colors <- ident.colors[Idents(object = sce_1)]
names(x = cell.colors) <- colnames(x = sce_1)
fit.quantile = 0.02
cell.dist <- as.dist(1-armaCor(t(sce_1@reductions$pca@cell.embeddings)))
rvel.cd <- gene.relative.velocity.estimates(sp,unsp,deltaT=2, kCells=10,
cell.dist=cell.dist,fit.quantile=fit.quantile,n.cores=24)
#可視化
Idents(sce_1) = "fine"
gg <- UMAPPlot(sce_1)
colors <- as.list(ggplot_build(gg)$data[[1]]$colour)
names(colors) <- rownames(sce_1_umap)
show.velocity.on.embedding.cor(sce_1_umap,rvel.cd,n=30,scale='sqrt',cell.colors=ac(colors,alpha=0.5),cex=0.8,arrow.scale=2,show.grid.flow=T,min.grid.cell.mass=1.0,grid.n=50,arrow.lwd=1,do.par=F,cell.border.alpha =0.1,USE_OPENMP=1,n.cores=24,main="Cell Velocity")
#比較耗時~~
在加載Seurat對象時,可以僅提取感興趣的亞群琐簇,進行分析~
- 關(guān)于RNA速率分析的背景渣叛、結(jié)果解讀之后再學(xué)習(xí)一下~蘑秽。但從上面的流程來收看,還是比較復(fù)雜的缀雳。