2022-01-17 CCA校正批次效應

批次效應強時脑奠，用CCA進行校正

先用PCA圖查看不整合前的批次效應
原理在每個批次中找到相同的一組表達差異低的基因，用這組基因在每個亞群中的差異代表整體基因的差異箱锐，按照這個差異調(diào)整整體基因表達水平

# 該代碼用于理解并分析批次差異比較大的樣本躏尉，通過CCA算法將批次效應進行校正
# 并基于校正之后的數(shù)據(jù)進行下游分析

#加載分析使用的包
library(Seurat)
library(monocle)
library(devtools)
devtools::install_github("cole-trapnell-lab/monocle-release@develop")

library(monocle)
library(ggplot2)
library(cowplot)
library(Matrix)
library(dplyr)

#讀入原始表達數(shù)據(jù)
exp1<-read.table("./CCA_monocle2_data/01.CRC01-CRC02_500sc_Read_Counts.txt",
                 sep="\t",header=T,row.names=1)
colnames(exp1)<-paste(colnames(exp1),"read",sep="_")#第一個樣本叫read

exp2<-read.table("./CCA_monocle2_data/02.CRC03-04-06-09-10-11_688sc_UMI_Counts.txt",
                 sep="\t",header=T,row.names=1)
colnames(exp2)<-paste(colnames(exp2),"umi",sep="_") #第二個樣本叫umi

#不使用CCA去批次效應蓖救，直接合并分析
combined<-cbind(exp1,exp2)

#創(chuàng)建一個文件夾用于寫分析結(jié)果
result.name <- "cca_monocle_result"
if(!dir.exists(result.name)){
  dir.create(result.name)
}

#合并的樣本分析看批次效應
combined.seurat<- CreateSeuratObject(
  combined,
  project = "combined", 
  min.cells = 5,
  min.features = 200,
  names.field = 4,
  names.delim = "_")
combined.seurat[["percent.mt"]] <- PercentageFeatureSet(combined.seurat,pattern = "^mt-")
combined.seurat<-NormalizeData(combined.seurat,verbose = FALSE)
combined.seurat<- FindVariableFeatures(combined.seurat, selection.method = "vst",nfeatures = 2000)
combined.seurat <- ScaleData(combined.seurat, verbose = FALSE)
combined.seurat <- RunPCA(combined.seurat, npcs = 50, verbose = FALSE)
pdf(paste0("./",result.name,"/PCA-DimPlot-noCCA.pdf"),width = 5,height = 4)
DimPlot(combined.seurat, reduction = "pca", )
dev.off()

接下來用CCA校正洪规，需要校正幾個就創(chuàng)建幾個seurat對象


#創(chuàng)建Seurat對象，標準化及尋找高表達變異基因-exp1
exp1.seurat<- CreateSeuratObject(
  exp1,
  project = "exp1", 
  min.cells = 5,
  min.features = 200,
  names.field = 4,
  names.delim = "_")
exp1.seurat[["percent.mt"]] <- PercentageFeatureSet(exp1.seurat,pattern = "^mt-")
exp1.seurat<-NormalizeData(exp1.seurat,verbose = FALSE)
exp1.seurat<- FindVariableFeatures(exp1.seurat, selection.method = "vst",nfeatures = 2000)

#創(chuàng)建Seurat對象循捺，標準化尋找高表達變異基因-exp2
exp2.seurat<- CreateSeuratObject(
  exp2,
  project = "exp2", 
  min.cells = 5,
  min.features = 200,
  names.field = 4,
  names.delim = "_")
exp2.seurat[["percent.mt"]] <- PercentageFeatureSet(exp2.seurat,pattern = "^mt-")
exp2.seurat<-NormalizeData(exp2.seurat,verbose = FALSE)
exp2.seurat<- FindVariableFeatures(exp2.seurat, selection.method = "vst",nfeatures = 2000)

#整合2批數(shù)據(jù)(k.filter默認為200斩例，一定要小于最小細胞數(shù)，不然會報錯)
exp.anchors<- FindIntegrationAnchors(object.list = c(exp1.seurat,exp2.seurat), dims = 1:30)
exp.seurat <- IntegrateData(anchorset = exp.anchors, dims = 1:30)

#設置當前使用的Assay(不運行也行从橘，整合完以后默認使用整合后的Assay)
DefaultAssay(exp.seurat)<-"integrated"

#均一化
exp.seurat <- ScaleData(exp.seurat, verbose = FALSE)

#運行PCA
exp.seurat <- RunPCA(exp.seurat, npcs = 50, verbose = FALSE)

#PCA結(jié)果展示
pdf(paste0("./",result.name,"/PCA-DimPlot-CCA.pdf"),width = 5,height = 4)
DimPlot(exp.seurat, reduction = "pca")
dev.off()

#PCA結(jié)果展示 - 碎石圖
pdf(paste0("./",result.name,"/PCA-ElbowPlot.pdf"),width = 5,height = 4)
ElbowPlot(exp.seurat, ndim=50,reduction="pca")
dev.off()

#增加樣本以及批次的信息到metadata中
cell.name<-row.names(exp.seurat@meta.data)
cell.name<-strsplit(cell.name,"_")
sample<-c()
batch=c()
for (i in 1:length(cell.name)){sample<-c(sample,substring(cell.name[[i]][2],1,2));batch<-c(batch,cell.name[[i]][4])}
exp.seurat@meta.data[["sample"]]<-sample
exp.seurat@meta.data[["batch"]]<-batch
rm(cell.name,sample,batch,i)
View(exp.seurat@meta.data)

#確定用于細胞分群的PC
dim.use <- 1:20

#細胞分群TSNE算法
exp.seurat <- FindNeighbors(exp.seurat, dims = dim.use)
exp.seurat <- FindClusters(exp.seurat, resolution = 0.5)
exp.seurat <- RunTSNE(exp.seurat, dims = dim.use, do.fast = TRUE)

#展示TSNE分類結(jié)果
pdf(paste0("./",result.name,"/CellCluster-TSNEPlot_",max(dim.use),"PC.pdf"),width = 5,height = 4)
DimPlot(object = exp.seurat, pt.size=1,label = T)
dev.off()

#按照數(shù)據(jù)來源分組展示細胞異同-batch
pdf(paste0("./",result.name,"/CellCluster-TSNEPlot_batch_",max(dim.use),"PC.pdf"),width = 5,height = 4)
DimPlot(object = exp.seurat, group.by="batch", pt.size=1)
dev.off()

#按照數(shù)據(jù)來源分組展示細胞異同-sample
pdf(paste0("./",result.name,"/CellCluster-TSNEPlot_sample_",max(dim.use),"PC.pdf"),width = 5,height = 4)
DimPlot(object = exp.seurat, group.by="sample", pt.size=1)
dev.off()

#plot在同一張圖中
plot1 <- DimPlot(object = exp.seurat, pt.size=1,label = T)
plot2 <- DimPlot(object = exp.seurat, group.by="batch", pt.size=1)
plot3 <- DimPlot(object = exp.seurat, group.by="sample", pt.size=1)
pdf(paste0("./",result.name,"/Combined_plot_exp.seurat_",max(dim.use),"PC.pdf"),width = 14,height = 4)
CombinePlots(plots = list(plot1,plot2,plot3),ncol=3)
dev.off()
rm(plot1,plot2,plot3)

#計算marker基因
all.markers<-FindAllMarkers(exp.seurat, only.pos = TRUE, 
                            min.pct = 0.25, logfc.threshold = 0.25)

# 遍歷每一個cluster然后展示其中前4個基因
marker.sig <- all.markers %>% filter(p_val_adj <= 0.05)
for(cluster in unique(marker.sig$cluster)){
  cluster.markers <- FindMarkers(exp.seurat, ident.1 = cluster, min.pct = 0.25)
  cluster.markers <- as.data.frame(cluster.markers) %>% 
    mutate(Gene = rownames(cluster.markers))
  cl4.genes <- cluster.markers %>% arrange(desc(avg_logFC))
  cl4.genes <- cl4.genes[1:min(nrow(cl4.genes),4),"Gene"]
  
  #RidgePlot
  pvn<-RidgePlot(exp.seurat, features = cl4.genes, ncol = 2)
  pdf(paste0("./",result.name,"/MarkerGene-RidgePlot_cluster",cluster,"_tsne_",max(dim.use),"PC.pdf"),width = 7,height = 6)
  print(pvn)
  dev.off()
  
  #VlnPlot
  pvn <- VlnPlot(exp.seurat, features = cl4.genes)
  pdf(paste0("./",result.name,"/MarkerGene-VlnPlot_cluster",cluster,"_tsne_",max(dim.use),"PC.pdf"),width = 7,height = 6)
  print(pvn)
  dev.off()
  
  #Feather plot 
  pvn <- FeaturePlot(exp.seurat,features=cl4.genes)
  pdf(paste0("./",result.name,"/MarkerGene-FeaturePlot_cluster",cluster,"_tsne_",max(dim.use),"PC.pdf"),width = 7,height = 6)
  print(pvn)
  dev.off()
  
}
rm(cl4.genes,cluster.markers,pvn)

#熱圖展示Top marker基因
top10 <- marker.sig %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)

pdf(paste0("./",result.name,"/MarkerGene-Heatmap_all_cluster_tsne_",max(dim.use),"PC.pdf"),width = 8,height = 5)
DoHeatmap(exp.seurat, features = top10$gene,size = 2)
dev.off()
write.table(top10,file=paste0("./",result.name,"_top10_marker_genes_tsne_",max(dim.use),"PC.txt"),sep="\t",quote = F)

pdf(paste0("./",result.name,"/MarkerGene-DotPlot_all_cluster_tsne_",max(dim.use),"PC.pdf"),width = 100,height = 10)
DotPlot(exp.seurat, features = unique(top10$gene))+RotatedAxis()
dev.off()

?著作權歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

人面猴
序言：七十年代末念赶，一起剝皮案震驚了整個濱河市，隨后出現(xiàn)的幾起案子恰力，更是在濱河造成了極大的恐慌叉谜，老刑警劉巖，帶你破解...
沈念sama閱讀 218,941評論 6贊 508
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件踩萎，死亡現(xiàn)場離奇詭異停局，居然都是意外死亡，警方通過查閱死者的電腦和手機香府，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 93,397評論 3贊 395
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門董栽，熙熙樓的掌柜王于貴愁眉苦臉地迎上來，“玉大人企孩，你說我怎么就攤上這事锭碳。” “怎么了勿璃？”我有些...
開封第一講書人閱讀 165,345評論 0贊 356
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵擒抛，是天一觀的道長。經(jīng)常有香客問我补疑，道長歧沪，這世上最難降的妖魔是什么？我笑而不...
開封第一講書人閱讀 58,851評論 1贊 295
?港島之戀（遺憾婚禮）
正文為了忘掉前任莲组，我火速辦了婚禮诊胞，結(jié)果婚禮上，老公的妹妹穿的比我還像新娘胁编。我一直安慰自己厢钧，他們只是感情好，可當我...
茶點故事閱讀 67,868評論 6贊 392
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布嬉橙。她就那樣靜靜地躺著早直，像睡著了一般。火紅的嫁衣襯著肌膚如雪市框。梳的紋絲不亂的頭發(fā)上霞扬，一...
開封第一講書人閱讀 51,688評論 1贊 305
城市分裂傳說
那天，我揣著相機與錄音，去河邊找鬼喻圃。笑死萤彩，一個胖子當著我的面吹牛，可吹牛的內(nèi)容都是我干的斧拍。我是一名探鬼主播雀扶，決...
沈念sama閱讀 40,414評論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼，長吁一口氣：“原來是場噩夢啊……” “哼肆汹！你這毒婦竟也來了愚墓？” 一聲冷哼從身側(cè)響起，我...
開封第一講書人閱讀 39,319評論 0贊 276
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤昂勉，失蹤者是張志新（化名）和其女友劉穎浪册，沒想到半個月后，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體岗照，經(jīng)...
沈念sama閱讀 45,775評論 1贊 315
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡村象，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 37,945評論 3贊 336
?白月光啟示錄
正文我和宋清朗相戀三年，在試婚紗的時候發(fā)現(xiàn)自己被綠了攒至。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片厚者。...
茶點故事閱讀 40,096評論 1贊 350
活死人
序言：一個原本活蹦亂跳的男人離奇死亡，死狀恐怖嗓袱，靈堂內(nèi)的尸體忽然破棺而出籍救，到底是詐尸還是另有隱情习绢，我是刑警寧澤渠抹，帶...
沈念sama閱讀 35,789評論 5贊 346
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布，位于F島的核電站闪萄，受9級特大地震影響梧却，放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜败去，卻給世界環(huán)境...
茶點故事閱讀 41,437評論 3贊 331
男人毒藥：我在死后第九天來索命
文/蒙蒙一放航、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧圆裕，春花似錦广鳍、人聲如沸。這莊子的主人今日做“春日...
開封第一講書人閱讀 31,993評論 0贊 22
一樁弒父案赊时，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽。三九已至行拢，卻和暖如春祖秒，著一層夾襖步出監(jiān)牢的瞬間，已是汗流浹背。一陣腳步聲響...
開封第一講書人閱讀 33,107評論 1贊 271
情欲美人皮
我被黑心中介騙來泰國打工竭缝，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留房维，地道東北人。一個月前我還...
沈念sama閱讀 48,308評論 3贊 372
代替公主和親
正文我出身青樓抬纸，卻偏偏與公主長得像咙俩，于是被迫代替她去往敵國和親。傳聞我的和親對象是個殘疾皇子湿故，可洞房花燭夜當晚...
茶點故事閱讀 45,037評論 2贊 355

2022-01-17 CCA校正批次效應

批次效應強時脑奠，用CCA進行校正

推薦閱讀更多精彩內(nèi)容