劉小澤寫于19.10.24
筆記目的:根據生信技能樹的單細胞轉錄組課程探索Smartseq2技術及發(fā)育相關的分析
課程鏈接在:http://jm.grazy.cn/index/mulitcourse/detail.html?cid=55
這次會介紹如何對一些標記基因進行可視化鉴吹。對應視頻第三單元7-8講
前言
將對應文章這張圖(不過H這張圖使用的是差異基因,是下一篇的內容丑蛤;這里先用marker基因嘗試一下):
會根據之前的6個發(fā)育時期和4個cluster的tSNE結果,進行一些marker基因的等高線圖和熱圖可視化
另外還有小提琴圖:
對marker基因可視化的目的還是說明之前分的群是有道理的,文章中也提到了:
1 首先還是使用包裝好的代碼進行可視化
1.1 加載表達矩陣、獲得cluster信息
rm(list = ls())
options(warn=-1)
options(stringsAsFactors = F)
source("../analysis_functions.R")
load('../female_rpkm.Rdata')
# 加載之前HCPC分群結果
cluster <- read.csv('female_clustering.csv')
female_clustering=cluster[,2];names(female_clustering)=cluster[,1]
> table(female_clustering)
female_clustering
C1 C2 C3 C4
240 90 190 43
1.2 拿到文章中的marker基因列表
作者要對哪些基因可視化杖挣,都是有自己的思量的
# 作者選擇的14個marker基因
markerGenes <- c(
"Nr2f1",
"Nr2f2",
"Maf",
"Foxl2",
"Rspo1",
"Lgr5",
"Bmp2",
"Runx1",
"Amhr2",
"Kitl",
"Fst",
"Esr2",
"Amh",
"Ptges"
)
1.3 提取marker基因小表達矩陣
gene_subset <- as.matrix(log(females[rownames(females) %in% markerGenes,]+1))
> dim(gene_subset)
[1] 14 563
> gene_subset[1:4,1:4]
E10.5_XX_20140505_C01_150331_1 E10.5_XX_20140505_C02_150331_1 E10.5_XX_20140505_C03_150331_1 E10.5_XX_20140505_C04_150331_2
Kitl 2.547141 1.172887 4.1123988 4.032277
Lgr5 0.000000 0.000000 0.0568497 0.000000
Esr2 0.000000 0.000000 0.0000000 0.000000
Fst 0.000000 0.000000 0.0000000 2.694897
## 然后對這個小表達矩陣再細分,根據4個cluster的列名刚陡,也即是前面female_clustering=cluster[,2];names(female_clustering)=cluster[,1]這一步的目的
cl1_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C1"])]
cl2_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C2"])]
cl3_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C3"])]
cl4_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C4"])]
## 重新再組合起來
heatmap_gene_subset <- cbind(
cl1_gene_subset,
cl2_gene_subset,
cl3_gene_subset,
cl4_gene_subset
)
1.4 根據marker基因的順序惩妇,重新排列這個矩陣
# 之前是這樣
> rownames(heatmap_gene_subset);markerGenes
[1] "Kitl" "Lgr5" "Esr2" "Fst" "Runx1" "Amhr2" "Bmp2" "Rspo1" "Nr2f2" "Amh" "Foxl2" "Ptges" "Maf" "Nr2f1"
[1] "Nr2f1" "Nr2f2" "Maf" "Foxl2" "Rspo1" "Lgr5" "Bmp2" "Runx1" "Amhr2" "Kitl" "Fst" "Esr2" "Amh" "Ptges"
# 得到marker基因在heatmap_gene_subset中的位置
match(markerGenes,rownames(heatmap_gene_subset))
# 然后就能提取出和marker基因順序一樣的heatmap_gene_subset
heatmap_gene_subset <- heatmap_gene_subset[match(markerGenes,rownames(heatmap_gene_subset)),]
# 之后是這樣
> rownames(heatmap_gene_subset);markerGenes
[1] "Nr2f1" "Nr2f2" "Maf" "Foxl2" "Rspo1" "Lgr5" "Bmp2" "Runx1" "Amhr2" "Kitl" "Fst" "Esr2" "Amh" "Ptges"
[1] "Nr2f1" "Nr2f2" "Maf" "Foxl2" "Rspo1" "Lgr5" "Bmp2" "Runx1" "Amhr2" "Kitl" "Fst" "Esr2" "Amh" "Ptges"
1.5 修改表達矩陣的列名,得到6個時間點信息
heatmap_female_stages <- sapply(strsplit(colnames(heatmap_gene_subset), "_"), `[`, 1)
> table(heatmap_female_stages)
heatmap_female_stages
E10.5 E11.5 E12.5 E13.5 E16.5 P6
68 100 103 99 85 108
1.6 用包裝好的pheatmap畫熱圖
# 看到H圖中筐乳,列被分成了4欄歌殃,那么這個就是根據colbreaks來劃分的。colbreaks的意思就是從哪里到哪里這是一塊蝙云。當有多個分組又想畫一個分割線的話氓皱,這個參數就很有用
colbreaks <- c(
ncol(cl1_gene_subset),
ncol(cl1_gene_subset)+ncol(cl2_gene_subset),
ncol(cl1_gene_subset)+ncol(cl2_gene_subset)+ncol(cl3_gene_subset)
)
# 然后就是上色,6個時間點和4個群使用自定義的顏色
cluster_color <- c(
C1="#560047",
C2="#a53bad",
C3="#eb6bac",
C4="#ffa8a0"
)
stage_color=c(
E10.5="#2754b5",
E11.5="#8a00b0",
E12.5="#d20e0f",
E13.5="#f77f05",
E16.5="#f9db21",
P6="#43f14b"
)
# 開始畫熱圖
library(pheatmap)
png("female_marker_heatmap.png")
plot_heatmap_2(
heatmap_gene_subset,
female_clustering,
heatmap_female_stages,
rowbreaks,
colbreaks,
cluster_color,
stage_color
)
dev.off()
1.7 用包裝好的ggboxplot畫小提琴圖
pdf("step2.1-B-markers-violin.pdf", width=10, height=22)
require(gridExtra)
# 每個基因的小提琴圖都有4個cluster,對它們用不同的顏色
female_clusterPalette <- c(
"#560047",
"#a53bad",
"#eb6bac",
"#ffa8a0"
)
# 每個基因做一個小提琴圖波材,并用for循環(huán)保存在p這個列表中
p <- list()
for (genes in markerGenes) {
p[[genes]] <- violin_gene_exp(
genes,
females,
female_clustering,
female_clusterPalette
)
}
# 最后組合起來股淡,每列顯示3張
do.call(grid.arrange,c(p, ncol=3))
dev.off()
其中這個violin_gene_exp
函數是精髓,如果要看它做了什么廷区,可以按住cmd或ctrl唯灵,然后點一下這個函數,就會跳到自定義函數腳本中
1.8 用包裝好的geom_point
+geom_density_2d
畫等高線圖
pdf("step2.1-C-markers-tSNE-density.pdf", width=16, height=28)
require(gridExtra)
load('../step1-female-RPKM-tSNE/female_tsne.Rdata')
p <- list()
for (genes in markerGenes) {
p[[genes]] <- tsne_gene_exp(
female_tsne,
genes,
females
)
}
do.call(grid.arrange,c(p, ncol=4))
dev.off()
2 使用Seurat包帶的函數進行可視化
上一次已經做好了Seurat的tSNE分群結果隙轻,直接加載
load('seurat3-female-tsne.Rdata')
DimPlot(object = sce_female_tsne, reduction = "tsne")
然后畫小提琴圖和表達量熱圖
# 小提琴圖
pdf('seurat3_VlnPlot.pdf', width=10, height=15)
VlnPlot(object = sce_female_tsne, features = markerGenes ,
pt.size = 0.2,ncol = 4)
dev.off()
# 基因表達量熱圖
pdf('seurat3_FeaturePlot.pdf', width=10, height=15)
FeaturePlot(object = sce_female_tsne, features = markerGenes ,
pt.size = 0.2,ncol = 3)
dev.off()
比較作者代碼和Seurat的結果
取同一個基因Nr2f2埠帕,看看它們的小提琴圖:
然后如果我們自己畫圖呢?
# 就畫其中的Nr2f2基因
## 分類信息在此
group <- Seurat::Idents(sce_female)
## 表達矩陣在此
nr2f2 <- as.numeric(log(female_count['Nr2f2',]+1))
boxplot(nr2f2~group)
ggboxplot畫一個箱線圖并加上顯著性
df <- data.frame(expr=nr2f2,
group=group)
female_clusterPalette <- c(
"#560047",
"#a53bad",
"#eb6bac",
"#ffa8a0"
)
my_comparisons <- list( c("0", "1"), c("1", "2"), c("2", "3") )
ggboxplot(df, x = "group", y = "expr",
color = "group", palette = female_clusterPalette)+
stat_compare_means(comparisons = my_comparisons)
ggviolin再畫一個小提琴圖
df <- data.frame(expr=nr2f2,
group=group)
female_clusterPalette <- c(
"#560047",
"#a53bad",
"#eb6bac",
"#ffa8a0"
)
ggviolin(df, "group", "expr", fill = "group",
palette = female_clusterPalette,
add = "boxplot", add.params = list(fill = "white"))+
stat_compare_means(comparisons = my_comparisons)