摘要：對Nature Methods上發(fā)表的文章Single-cell landscape of bronchoalveolar immune cells in patients with COVID-19進(jìn)行文章結(jié)果復(fù)現(xiàn)灶伊。

1 數(shù)據(jù)來源

GSE145926中包含了12例BALF樣本的單細(xì)胞測序數(shù)據(jù)以蕴，其中包括6例重癥感染患者樣本裹纳，3例中癥感染患者樣本喘批，3例健康對照樣本，另GSM3660650中包含了1例健康對照樣本尖啡。

2 數(shù)據(jù)質(zhì)控并整合

所有數(shù)據(jù)準(zhǔn)備好后铁瞒，讀入數(shù)據(jù)進(jìn)行質(zhì)控歌豺，之后對多樣本來源的數(shù)據(jù)進(jìn)行整合并去除批次效應(yīng)，主要R語言代碼如下：

library(Seurat)
library(Matrix)
library(dplyr)
rm(list=ls())
# 讀入元數(shù)據(jù)信息
samples <- read.delim2("meta.txt",header = TRUE, 
  stringsAsFactors = FALSE,check.names = FALSE, sep = "\t")

### 讀入表達(dá)數(shù)據(jù)并進(jìn)行質(zhì)控
nCoV.list <- list()
for(sample_s in samples$sample){
  print(sample_s)
  sample_i = samples %>% dplyr::filter(.,sample == sample_s)
  # GSM3660650是從GEO下載的表達(dá)矩陣文件，其他文件是10X的h5文件
  if (sample_s=="GSM3660650"){
    datadir = paste("GSE145926_RAW/GSM3660650/",sep="")
    sample.tmp <- Read10X(data.dir = datadir)
  }else{
    datadir = paste("GSE145926_RAW/",sample_s,"_filtered_feature_bc_matrix.h5",sep="")
    sample.tmp <- Read10X_h5(datadir)
  }
sample.tmp.seurat <- CreateSeuratObject(counts = sample.tmp, min.cells = 3, min.features = 200,project = sample_s)
sample.tmp.seurat[['percent.mito']] <- PercentageFeatureSet(sample.tmp.seurat, pattern = "^MT-")
#去除表達(dá)基因數(shù)量低于200或超過6000履因，UMI數(shù)量超過1000障簿，線粒體RNA比例超過10%的細(xì)胞
sample.tmp.seurat <- subset(x = sample.tmp.seurat, subset = nFeature_RNA > 200 & 
nFeature_RNA < 6000 & 
nCount_RNA > 1000 & 
percent.mito < 10)
sample.tmp.seurat <- NormalizeData(sample.tmp.seurat, verbose = FALSE)
sample.tmp.seurat <- FindVariableFeatures(sample.tmp.seurat, selection.method = "vst", 
nfeatures = 2000,verbose = FALSE)
nCoV.list[sample_s] = sample.tmp.seurat
}

### 使用Seurat3自帶的IntegrateData函數(shù)進(jìn)行整合并去除批次效應(yīng)，具體算法原理不詳
nCoV <- FindIntegrationAnchors(object.list = nCoV.list, dims = 1:50)
nCoV.integrated <- IntegrateData(anchorset = nCoV, dims = 1:50,features.to.integrate = rownames(nCoV))

### 添加樣本信息栅迄，主要包括病癥類型與對照等信息
sample_info = as.data.frame(colnames(nCoV.integrated))
colnames(sample_info) = c('ID')
rownames(sample_info) = sample_info$ID
sample_info$sample = nCoV.integrated@meta.data$orig.ident
sample_info = dplyr::left_join(sample_info,samples)
rownames(sample_info) = sample_info$ID
nCoV.integrated = AddMetaData(object = nCoV.integrated, metadata = sample_info)

### 對RNA矩陣進(jìn)行Normalize和Scale站故，以便進(jìn)行差異表達(dá)和可視化
DefaultAssay(nCoV.integrated) <- "RNA"
nCoV.integrated[['percent.mito']] <- PercentageFeatureSet(nCoV.integrated, pattern = "^MT-")
nCoV.integrated <- NormalizeData(nCoV.integrated, normalization.method = "LogNormalize", scale.factor = 1e4)
nCoV.integrated <- FindVariableFeatures(nCoV.integrated, selection.method = "vst", nfeatures = 2000,verbose = FALSE)
nCoV.integrated <- ScaleData(nCoV.integrated, verbose = FALSE, vars.to.regress = c("nCount_RNA", "percent.mito"))

3 聚類與降維

這部分程序主要實(shí)現(xiàn)表達(dá)數(shù)據(jù)的PCA分析、聚類分析毅舆、降維展示世蔗，以及差異表達(dá)分析鑒定細(xì)胞群的marker gene，R語言代碼如下：

### DefaultAssay設(shè)置為“integrated”矩陣并進(jìn)行下游分析朗兵，
DefaultAssay(nCoV.integrated) <- "integrated"
### 以下進(jìn)入Seurat標(biāo)準(zhǔn)分析流程
nCoV.integrated <- ScaleData(nCoV.integrated, verbose = FALSE, vars.to.regress = c("nCount_RNA", "percent.mito"))
nCoV.integrated <- RunPCA(nCoV.integrated, verbose = FALSE,npcs = 100)
nCoV.integrated <- ProjectDim(object = nCoV.integrated)
### 通過ElbowPlot選擇PC維度進(jìn)行下游降維和聚類
dpi = 300
png(file="pca.png", width = dpi*10, height = dpi*6, units = "px",res = dpi,type='cairo')
ElbowPlot(object = nCoV.integrated,ndims = 100)
dev.off()

Figure 1. PCA ElbowPlot.png

圖1顯示污淋，大約前20個PCA維度就可以捕獲數(shù)據(jù)的真實(shí)信號，文章中作者使用了前50個PCA維度進(jìn)行降維與聚類分析余掖。

### 聚類
nCoV.integrated <- FindNeighbors(object = nCoV.integrated, dims = 1:50)
nCoV.integrated <- FindClusters(object = nCoV.integrated, resolution = 1.2)

### 分別采用tSNE和umap兩種方法進(jìn)行降維展示
nCoV.integrated <- RunTSNE(object = nCoV.integrated, dims = 1:50)
nCoV.integrated <- RunUMAP(nCoV.integrated, reduction = "pca", dims = 1:50)

dpi=300
# umap展示
png(file="1-umap_1.png", width = dpi*8, height = dpi*6, units = "px",res = dpi,type='cairo')
pp = DimPlot(nCoV.integrated, reduction = 'umap',pt.size = 0.5,label = T,label.size = 5)
pp = pp + theme(axis.title = element_text(size = 15),
axis.text =  element_text(size = 15,family = 'sans'), 
legend.text = element_text(size = 15),
axis.line = element_line(size = 0.8))
print(pp)
dev.off()
# tsne展示
png(file="1-tsne_1.png", width = dpi*8, height = dpi*6, units = "px",res = dpi,type='cairo')
pp = DimPlot(nCoV.integrated, reduction = 'tsne',pt.size = 0.5,label = T,label.size = 5)
pp = pp + theme(axis.title = element_text(size = 15),
axis.text =  element_text(size = 15,family = 'sans'), 
legend.text = element_text(size = 15),
axis.line = element_line(size = 0.8))
print(pp)
dev.off()

Figure 2. UMAP and tSNE embedding.png

Figure2顯示寸爆，一共鑒定到31群細(xì)胞，與文章中Figure1A基本一致盐欺。

### 使用RNA矩陣進(jìn)行marker gene的鑒定赁豆，而不是批次矯正后的integrated矩陣，
###此處需要注意冗美，除Seurat之外魔种，其他多種差異表達(dá)分析的算法也推薦使用原始表達(dá)值，而不是批次矯正后的數(shù)值粉洼；
###此外节预，RNA矩陣用作文章中作圖數(shù)據(jù)來源。
###據(jù)此來看属韧，integrated矩陣僅僅是在去除批次效應(yīng)安拟、降維和聚類過程中用到，差異表達(dá)與數(shù)據(jù)可視化使用的都是RNA矩陣宵喂。
DefaultAssay(nCoV.integrated) <- "RNA"
nCoV.integrated@misc$markers <- FindAllMarkers(object = nCoV.integrated, assay = 'RNA',only.pos = TRUE, test.use = 'MAST')
write.table(nCoV.integrated@misc$markers,file='marker_MAST.txt',row.names = FALSE,quote = FALSE,sep = '\t')
### 挑選出每種細(xì)胞類型的marker gene糠赦，以及每個細(xì)胞群前30個表達(dá)最高的marker gene，加上variable feature锅棕，進(jìn)行scaleData分析拙泽，
###目的是為了在scale.data中添加部分感興趣的基因
markers = c('AGER','SFTPC','SCGB3A2','TPPP3','KRT5', 'FCGR3A','TREM2','KRT18',  #上皮系細(xì)胞
    'CD68','FCN1','CD1C','TPSB2','CD14','MARCO','CXCR2','CLEC9A','IL3RA',  #樹突細(xì)胞
    'CD3D','CD8A','KLRF1',  #T/NK細(xì)胞
    'CD79A','IGHG4','MS4A1',  #B細(xì)胞
    'VWF','DCN'  #內(nèi)皮細(xì)胞)
hc.markers = read.delim2("marker_MAST.txt",header = TRUE, stringsAsFactors = FALSE,check.names = FALSE, sep = "\t")
hc.markers %>% group_by(cluster) %>% top_n(n = 30, wt = avg_logFC) -> top30
var.genes = c(nCoV.integrated@assays$RNA@var.features,top30$gene,markers)
nCoV.integrated <- ScaleData(nCoV.integrated, verbose = FALSE, vars.to.regress = c("nCount_RNA", "percent.mito"),features = var.genes)
saveRDS(nCoV.integrated, file = "nCoV.rds")

4 細(xì)胞群注釋

這一步主要根據(jù)各個細(xì)胞群的marker gene表達(dá)情況確定其所屬的細(xì)胞類型，R語言代碼如下：

### marker gene umap熱圖
markers = c('TPPP3','KRT18','CD68','FCGR3B','CD1C','CLEC9A','LILRA4','TPSB2','CD3D','KLRD1','MS4A1','IGHG4')
png(file="1-umap_marker_1.png", width = dpi*16, height = dpi*10, units = "px",res = dpi,type='cairo')
pp_temp = FeaturePlot(object = nCoV.integrated, features = markers,cols = c("lightgrey","#ff0000"),combine = FALSE)
plots <- lapply(X = pp_temp, FUN = function(p) p + theme(axis.title = element_text(size = 22),
axis.text = element_text(size = 22), 
plot.title = element_text(family = 'sans',face='italic',size=22),
axis.line = element_line(size = 1.5),
axis.ticks = element_line(size = 1.2), 
legend.text = element_text(size = 22),
legend.key.height = unit(1.8,"line"),
legend.key.width = unit(1.2,"line")))
pp = CombinePlots(plots = plots,ncol = 4,legend = 'right')
print(pp)
dev.off()

Figure 3. UMAP projection of canonical markers for different cell types.png

#根據(jù)以上注釋結(jié)果對細(xì)胞群添加注釋信息
nCoV.integrated1 <- RenameIdents(nCoV.integrated, '12'='Epithelial','18'='Epithelial','26'='Epithelial','28'='Epithelial',
'0'='Macrophages','1'='Macrophages','2'='Macrophages','3'='Macrophages','4'='Macrophages','5'='Macrophages','6'='Macrophages','8'='Macrophages',
'10'='Macrophages','11'='Macrophages','15'='Macrophages','16'='Macrophages','20'='Macrophages','22'='Macrophages','23'='Macrophages','30'='Macrophages',
'29'='Mast','7'='T','9'='T','13'='T','19'='NK','24'='Plasma','25'='Plasma',
'14'='Neutrophil','17'='mDC','27'='pDC','21'='Doublets')
#計(jì)算各類細(xì)胞比例
nCoV.integrated1[["cluster"]] <- Idents(object = nCoV.integrated1)
big.cluster = nCoV.integrated1@meta.data
organ.summary = as.data.frame.matrix(table(big.cluster$sample,big.cluster$cluster))
organ.summary1 = organ.summary %>% select(c('Macrophages','Neutrophil','mDC','pDC','T','NK','Plasma','Mast'))
sample.percent <- round(organ.summary1 / rowSums(organ.summary1),3)

###添加分組信息
samples = read.delim2("../meta.txt",header = TRUE, stringsAsFactors = FALSE,check.names = FALSE, sep = "\t")
rownames(samples)=samples$sample
sample.percent$sample <- samples[rownames(sample.percent),'sample']
sample.percent$group <- samples[rownames(sample.percent),'group']

###作圖展示
pplist = list()
nCoV_groups = c('Macrophages','Neutrophil','mDC','pDC','T','NK','Plasma','Mast')
for(group_ in nCoV_groups){
    sample.percent_  = sample.percent %>% select(one_of(c('sample','group',group_)))
    colnames(sample.percent_) = c('sample','group','percent')
    sample.percent_$percent = as.numeric(sample.percent_$percent)
    sample.percent_ <- sample.percent_ %>% group_by(group) %>% mutate(upper =  quantile(percent, 0.75), lower = quantile(percent, 0.25),mean = mean(percent),median = median(percent))
    print(group_)
    print(sample.percent_$median)

    pp1 = ggplot(sample.percent_,aes(x=group,y=percent)) + geom_jitter(shape = 21,aes(fill=group),width = 0.25) + stat_summary(fun=mean, geom="point", color="grey60") +
    theme_cowplot() +
    theme(axis.text = element_text(size = 6),axis.title = element_text(size = 6),legend.text = element_text(size = 6),
    legend.title = element_text(size = 6),plot.title = element_text(size = 6,face = 'plain'),legend.position = 'none') + labs(title = group_,y='Percentage') +
    geom_errorbar(aes(ymin = lower, ymax = upper),col = "grey60",width =  0.25)

###組間t檢驗(yàn)分析
    labely = max(sample.percent_$percent)
    compare_means(percent ~ group,  data = sample.percent_)
    my_comparisons <- list( c("HC", "M"), c("M", "S"), c("HC", "S") )
    pp1 = pp1 + stat_compare_means(comparisons = my_comparisons,size = 1,method = "t.test")
    pplist[[group_]] = pp1
}
pdf(file="sample-percentage.pdf", width = 6, height = 3)
print(plot_grid(pplist[['Macrophages']],pplist[['Neutrophil']],pplist[['mDC']],pplist[['pDC']],pplist[['T']],pplist[['NK']],pplist[['Plasma']],pplist[['Mast']],ncol = 4, nrow = 2))
dev.off()

Figure 4. The proportions of the major BALF immune cell types in different groups.png

5 巨噬細(xì)胞分析

這一部分主要是對巨噬細(xì)胞進(jìn)行分析裸燎，R語言代碼如下：

###按照前面細(xì)胞marker gene顾瞻，我們鑒定到16群巨噬細(xì)胞，首先提取巨噬細(xì)胞子集
nCoV.integrated = readRDS(file = "../nCoV.rds")
Macrophage = subset(nCoV.integrated,idents = c('0','1','2','3','4','5','6','8','10','11','15','16','20','22','23','30'))

###將不同樣本的巨噬細(xì)胞分開顺少，之后通過IntegrateData矯正批次效應(yīng)
DefaultAssay(Macrophage) <- "RNA"
SubseMacrophages.list <- SplitObject(Macrophage, split.by = "sample")
for (i in 1:length(SubseMacrophages.list)) {
SubseMacrophages.list[[i]] <- NormalizeData(SubseMacrophages.list[[i]], verbose = FALSE)
SubseMacrophages.list[[i]] <- FindVariableFeatures(SubseMacrophages.list[[i]], selection.method = "vst", nfeatures = 2000,verbose = FALSE)
}
samples_name = c('C51','C52','C100','GSM3660650','C141','C142','C144','C143','C145','C146','C148','C149','C152')
reference.list <- SubseMacrophages.list[samples_name]
Macrophage.temp <- FindIntegrationAnchors(object.list = reference.list, dims = 1:50,k.filter = 115)
Macrophage.Integrated <- IntegrateData(anchorset = Macrophage.temp, dims = 1:50)

### 這部分代碼與前面相同朋其，先對整合后的RNA矩陣進(jìn)行Normalize和Scale
DefaultAssay(nCoV.integrated) <- "RNA"
nCoV.integrated[['percent.mito']] <- PercentageFeatureSet(nCoV.integrated, pattern = "^MT-")
nCoV.integrated <- NormalizeData(object = nCoV.integrated, normalization.method = "LogNormalize", scale.factor = 1e4)
nCoV.integrated <- FindVariableFeatures(object = nCoV.integrated, selection.method = "vst", nfeatures = 2000,verbose = FALSE)
nCoV.integrated <- ScaleData(nCoV.integrated, verbose = FALSE, vars.to.regress = c("nCount_RNA", "percent.mito"))

### DefaultAssay設(shè)置為“integrated”矩陣并進(jìn)行下游分析王浴，包括降維與聚類等
DefaultAssay(Macrophage.Integrated) <- "integrated"
Macrophage.Integrated <- ScaleData(Macrophage.Integrated, verbose = FALSE, vars.to.regress = c("nCount_RNA", "percent.mito"))
Macrophage.Integrated <- RunPCA(Macrophage.Integrated, verbose = FALSE)
#visulaization pca result
Macrophage.Integrated <- ProjectDim(object = Macrophage.Integrated)

###選擇前50個PCA維度進(jìn)行聚類，并采用tSNE和UMAP進(jìn)行降維展示
Macrophage.Integrated <- FindNeighbors(object = Macrophage.Integrated, dims = 1:50)
Macrophage.Integrated <- FindClusters(object = Macrophage.Integrated, resolution = 0.8)
Macrophage.Integrated <- RunTSNE(object = Macrophage.Integrated, dims = 1:50)
Macrophage.Integrated <- RunUMAP(Macrophage.Integrated, reduction = "pca", dims = 1:50)

###巨噬細(xì)胞UMAP展示
Macrophage.Integrated <- RunUMAP(Macrophage.Integrated, reduction = "pca", dims = 1:50)
png(file="2-umap-1.png", width = dpi*8, height = dpi*6, units = "px",res = dpi,type='cairo')
pp = DimPlot(object = Macrophage.Integrated, reduction = 'umap',label = T,pt.size = 0.8,label.size = 6,repel = TRUE)
pp = pp + theme(axis.title = element_text(size = 16),axis.text =  element_text(size = 16),
legend.text = element_text(size = 16),axis.line = element_line(size = 1))
print(pp)
dev.off()

Figure 5. The UMAP presentation of macrophages..png

在此我們共鑒定到18群巨噬細(xì)胞梅猿，與文章中的Supplementary Figure 2A基本一致氓辣。

###巨噬細(xì)胞marker gene熱圖
dpi = 300
markers = c('SPP1','FCN1','IL1B','MAFB','FABP4','MARCO','INHBA','TREM2')
png(file="umap_marker.png", width = dpi*13, height = dpi*6, units = "px",res = dpi,type='cairo')
pp_temp = FeaturePlot(object = Macrophage.Integrated, ncol = 4, features = markers,cols = c("lightgrey","#ff0000"),combine = FALSE,pt.size = 0.8)
plots <- lapply(X = pp_temp, FUN = function(p) p + theme(plot.title = element_text(family = 'sans',face='italic',size=16),legend.position = 'right') +
theme(axis.line = element_line(size = 0.8),axis.ticks = element_line(size = 0.8),
axis.text = element_text(size = 16),axis.title = element_text(size = 16),
legend.text = element_text(size =16)))
pp = CombinePlots(plots = plots,ncol = 4,legend = 'right')
print(pp)
dev.off()

Figure 6. UMAP plots showing the expression of several markers on BALF macrophages.png

據(jù)Figure 6可將巨噬細(xì)胞分為四組：FCN1hi（group 1），F(xiàn)CN1loSPP1+（group 2）袱蚓，SPP1+（group 3）钞啸，F(xiàn)ABP4+（group 4），在此基礎(chǔ)上進(jìn)行下游分析喇潘。

文章復(fù)現(xiàn) | 單細(xì)胞測序揭示COVID-19患者支氣管肺泡免疫細(xì)胞圖譜