1.小提琴圖標注p值
(接lesson4)
仍以基因Rbp1的表達為例
#提取Rbp1的表達量并與metadata中其他信息合并,使metadata中增加一列表示每個細胞的Rbp1表達量
scFURIN =subset(scRNA1,Rbp1>0)
FURIN_exp=scFURIN@assays$RNA@data["Rbp1",]
FURIN_exp.m =scFURIN@meta.data
FURIN_exp.m$exp=FURIN_exp
FURIN_exp.m$Cohort=factor(FURIN_exp.m$orig.ident,levels=c("dapi1","dapi2","CAF1","CAF2"))
#計算p值
library(ggsignif)
#兩兩計算的分組
my_comparisons=list(c("DapiNeg1","CAF1"),c("DapiNeg2",'CAF2'),
c("DapiNeg2" ,"CAF1"))
#可視化
ggplot(FURIN_exp.m,aes(Cohort,exp,fill=Cohort))+
geom_violin()+theme_classic()+
theme(plot.title = element_text(size=15,face='bold'),
axis.text.x = element_text(size=15,face='bold',angle=25,hjust=1,vjust=1),
axis.text.y = element_text(size=18,face='bold'),
axis.title.x =element_text(size=0,vjust=1,face='bold'),
axis.title.y = element_text(size=15,face='bold')) +
labs(y='Expression',x="Group")+
geom_signif(comparisons=my_comparisons,step_increase=0.1,map_signif_level=F,test=t.test,size=1,textsize=6)+
geom_jitter(shape=16,position=position_jitter(0.2))+
ggtitle("Rbp1")+theme(plot.title=element_text(size=18,hjust=0.5))+
guides(fill=guide_legend(title=' '))+
theme(axis.line.x=element_line(linetype=1,color="black",size=1))+
theme(axis.line.y=element_line(linetype=1,color="black",size=1))+
theme(legend.text=element_text(size=15))
2.比例圖標注p值
畫比例圖
加載數(shù)據(jù)與r package
##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())
setwd("C:/Users/86269/Desktop/shun.C/single_cell")
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
load("sc.combined.rdata")
為畫圖挑選顏色
library(RColorBrewer)
qual_col_pals=brewer.pal.info[brewer.pal.info$category=='qual',]
col_vector = unlist(mapply(brewer.pal,qual_col_pals$maxcolors,rownames(qual_col_pals)))
提取細胞的樣品來源、cluster以及數(shù)量
library(reshape2)
pB2_df <- table(sc.combined@meta.data$new.celltype.2,sc.combined@meta.data$type) %>% melt()
colnames(pB2_df) <- c("Cluster","Sample","Number")
sample_color <- col_vector[1:9]
可視化
pB2 <- ggplot(data = pB2_df,aes(x=Cluster,y=Number,fill=Sample))+
geom_bar(stat="identity",width=0.8,position="fill")+
scale_fill_manual(values=sample_color)+
theme_bw()+
theme(panel.grid=element_blank())+
labs(x="",y="Ratio")+
theme(axis.text.y=element_text(size=12,colour="black"))+
theme(axis.text.x=element_text(hjust=1,vjust=1,angle=45))
pB2
該圖存在的問題是症见,三種sample的細胞總數(shù)不同喂走,因此正常情況下必須從不同sample中隨機抽取同等數(shù)量的細胞作比例圖才有意義。但是此處三種sample中細胞總數(shù)分別為
數(shù)量差別不大谋作,因此可以省去隨機抽樣這一步
對換橫縱坐標
pB3 <- ggplot(data=pB2_df,aes(x=Number,y=Cluster,fill=Sample))+
geom_bar(stat='identity',width=0.8,position='fill')+
scale_fill_manual(values=sample_color)+
theme_bw()+
theme(panel.grid=element_blank())+
labs(x="",y="Ratio")+
theme(axis.text.y=element_text(size=12,colour="black"))+
theme(axis.text.x=element_text(size=12,colour="black"))+
theme(
axis.text.x.bottom =element_text(hjust=1,vjust=1,angle=45)
)
pB3
將填充顏色對應為細胞種類(即cluster)
pB4 <- ggplot(data=pB2_df,aes(x=Number,y=Sample,fill=Cluster))+
geom_bar(stat='identity',width=0.8,position='fill')+
scale_fill_manual(values=col_vector[1:20])+
theme_bw()+
theme(panel.grid=element_blank())+
labs(x="",y="Ratio")+
theme(axis.text.y=element_text(size=12,colour="black"))+
theme(axis.text.x=element_text(size=12,colour="black"))+
theme(
axis.text.x.bottom =element_text(hjust=1,vjust=1,angle=45)
)
pB4
在圖中標注誤差線
#計算細胞比例
x <- table(sc.combined@meta.data$new.celltype.2,sc.combined@meta.data$orig.ident)
#計算每個sample中每種細胞的占比
x3<-t(t(x)/rowSums(t(x)))
x4<-as.data.frame(as.table(t(x3)))
colnames(x4)=c("sample","celltype","Freq")
x4$group =x4$sample %>%str_replace("_.","")
#計算誤差線上下值
top<-function(x){
return(mean(x)+sd(x)/sqrt(length(x)))
}
bottom <- function(x){
return(mean(x)-sd(x)/sqrt(length(x)))
}
#每個細胞類型在各分組中的情況芋肠,以上皮細胞為例
dose_epi <- x4[which(x4$celltype=="Epithelial"),]
ggplot(data=dose_epi,aes(x=group,y=Freq,fill=group))+
stat_summary(geom="bar",fun='mean',
position=position_dodge(0.9))+
stat_summary(geom='errorbar',fun.min = bottom,fun.max = top,position=position_dodge(0.9),width=0.2)+
scale_y_continuous(expand=expansion(mult = c(0.01)))+
theme_bw()+
theme(panel.grid=element_blank())+
labs(x="Group",y="Proportion")+
geom_point(data=dose_epi,aes(group,Freq),size=1,pch=19)+
theme(
axis.text.x.bottom=element_text(hjust=1,vjust=1,angle=45)
)+ggtitle("Epithelial")
3.聚類圖上加標簽
加載數(shù)據(jù)
##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())
setwd("C:/Users/86269/Desktop/shun.C/single_cell")
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
library(harmony)
library(cowplot)
load("scRNA1.Rdata")
將聚類結果與降維結果整合
x=as.data.frame(scRNA1@active.ident)
df<-scRNA1@reductions$tsne@cell.embeddings
df <- cbind(df,x)
colnames(df)<-c("x","y","ident")
初步畫出聚類圖
p<- ggplot(df,aes(x=x,y=y))+geom_point(aes(colour=ident),size=1)+
labs(x="t-SNE 1",y="t-SNE 2")
p
刪去分組信息
p1 <- p+NoLegend()
p1
#加上metadata中的其他信息
meta=scRNA1@meta.data
meta=meta[,c(1:6)]
df.m=merge(df,meta,by=0)
在圖中加上celltype信息
df.m <- df.m %>% group_by(celltype) %>%summarise(x=median(x),y=median(y))
ggplot(df,aes(x,y))+
geom_point(aes(colour=ident))+
ggrepel::geom_text_repel(aes(label=celltype),
data=df.m,
size=5,
label.size=1,
segment.color=NA)+
theme(legend.position = "none")+theme_bw()
在圖中加上cluster信息
df.m <- df.m %>% group_by(seurat_cluster) %>%summarise(x=median(x),y=median(y))
ggplot(df,aes(x,y))+
geom_point(aes(colour=ident))+
ggrepel::geom_text_repel(aes(label=seurat_clusters),
data=df.m,
size=5,
label.size=1,
segment.color=NA)+
theme(legend.position = "none")+theme_bw()
4.在聚類圖上畫橢圓線(暫時沒搞懂)
以p為例
obs <- p$data[,c("x","y","ident")]
ellipse_pro <- 0.98
theta <- c(seq(-pi,pi,length=50),seq(pi,-pi,length=50))
circle <- cbind(cos(theta),sin(theta))
ell <- ddply(obs,'ident',function(x){
if(nrow(x)<=2){
return(NULL)
}
sigma<-var(cbind(x$x,x$y))#求方差
mu<-c(mean(x$x),mean(x$y))#求平均數(shù)
ed <-sqrt(qchisq(ellipse_pro,df=2))
data.frame(sweep(circle %*% chol(sigma)*ed,2,mu,FUN="+"))#chol是求一個正定矩陣的上三角矩陣,%*%是矩陣相乘
})
names(ell)[2:3]<- c('x','y')
ell <-ddply(ell,.(ident),function(x)x[chull(x$x,x$y),])
p1 <- p+geom_polygon(data=ell,aes(group=ident),
colour= 'black',
alpha=1,fill=NA,
linetype="dashed")
p1
5.在聚類圖中將某些點進行highlight
取某些細胞在聚類圖中用不同的顏色標注
#以scRNA1前100個細胞為例
mutlist <- colnames(scRNA1)[1:100]
DimPlot(scRNA1,reduction="tsne",
cells.highlight = mutlist,
cols.highlight='#11AA4D',
sizes.highlight=1.5,
group.by='orig.ident')
6.3d PCA聚類圖
加載數(shù)據(jù)與package
##系統(tǒng)報錯改為英文
Sys.setenv(LANGUAGE = "en")
##禁止轉化為因子
options(stringsAsFactors = FALSE)
##清空環(huán)境
rm(list=ls())
library(Seurat)
library(scatterplot3d)
setwd("C:/Users/86269/Desktop/shun.C/single_cell")
load("sc.combined.rdata")
數(shù)據(jù)預處理
#提取三種不同類型的細胞
sc.1=subset(sc.combined,idents = c("Epithelial","Endothelial","Stem_cell"))
#將分組依據(jù)換為sample
Idents(sc.1)="orig.ident"
#用取平均值的方式將每個sample的每種細胞合并成一個表達矩陣(將每個sample的每個細胞種類的所有細胞的基因表達量加和遵蚜,再除以細胞數(shù))
x=AverageExpression(sc.1 ,add.ident ="new.celltype.2")
x=x$RNA
進行PCA降維
#PCA降維
pca <- prcomp(t(x))
#提取每個細胞類型的降維結果
pca.data=pca$x
#提取畫圖需要的PC1,PC2,PC3
pca.data=as.data.frame(pca.data)
pca.data=pca.data[,1:3]
s1 <- strsplit(rownames(pca.data),split = "_")
s2 <- sapply(s1,function(x){x[1]})
s3 <- sapply(s1,function(x){x[3]})
pca.data$Type <- s2
pca.data$celltype = s3
可視化
###不同type用形狀區(qū)分 不同celltype用顏色區(qū)分
Type.p=c(rep(11,9),rep(16,9),rep(17,9))
cell.type.p = c(rep(c("blue","red","orange"),9 ))
colors.lib <- c("blue","red","orange")
shapes.lib = c(11,16,17)
###畫圖
source('huage.3D.plot.R')
s3d <- scatterplot3d(pca.data[,c("PC1","PC2","PC3")],
pch = Type.p,color = cell.type.p,
cex.symbols = 1,grid=FALSE, box=FALSE,
main = "3D PCA plot")
#加入celltype信息
legend("topright",
c("Epithelial","Endothelial","Stem_cell"),
fill=c('blue',"red","orange"),
box.col=NA,
cex=0.6)
#加入type信息
legend("topleft",
c('GF','SPF','FMT'),
pch = shapes.lib,
box.col=NA,
cex =0.6,
y.intersp = 0.5)
#加上平面
addgrids3d(pca.data[,c("PC1","PC2","PC3")], grid = c("xy", "xz", "yz"))
從圖中可看出不同分組里哪些細胞類型差異較大帖池,分散越明顯差異越大