一. 示例數(shù)據(jù)準(zhǔn)備
下載:鏈接:https://pan.baidu.com/s/1_b8swSkWDqIHZi6UwKaspA
提取碼:pll7
文件說明
示例數(shù)據(jù)巢墅,其中數(shù)據(jù)均為虛擬數(shù)據(jù)蛹头,與實(shí)際生物學(xué)過程無關(guān)
文件名:dataset_heatmap.txt
列分別為基因函卒,cell1的5個(gè)重復(fù)樣本,cell2的5個(gè)重復(fù)樣本
行代表每個(gè)基因在所有樣本的FPKM值
二. 環(huán)境需求
Rstudio:
如果系統(tǒng)中沒有 Rstudio泵额,先下載安裝:https://www.rstudio.com/products/rstudio/download/#download
heatmaps 包:
如果沒有安裝該R包配深,執(zhí)行以下代碼:
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("heatmaps")
三. 繪制聚類熱圖
1. 聚類熱圖繪制
# 執(zhí)行前設(shè)置====================================
# 清空暫存數(shù)據(jù)
rm(list=ls())
# 載入R包
library(pheatmap)
# 設(shè)置工作目錄
setwd("E:/R/WorkSpace/baimoc/visualization")
# 整理數(shù)據(jù)集====================================
# 載入數(shù)據(jù)
dataset <- read.table('resource/dataset_heatmap.txt',header = TRUE, row.names = 1)
# 截取表達(dá)矩陣的一部分?jǐn)?shù)據(jù)來繪制熱圖
exp_ds = dataset[c(1:60),c(1:10)]
# 構(gòu)建樣本分類數(shù)據(jù)
cell_list=c(rep('cell_1',5),
rep('cell_2',5))
annotation_c <- data.frame(cell_list)
rownames(annotation_c) <- colnames(exp_ds)
# 繪制熱圖=====================================
pheatmap(exp_ds, #表達(dá)數(shù)據(jù)
cluster_rows = T,#行聚類
cluster_cols = T,#列聚類
annotation_col =annotation_c, #樣本分類數(shù)據(jù)
annotation_legend=TRUE, # 顯示樣本分類
show_rownames = T,# 顯示行名
show_colnames = T,# 顯示列名
scale = "row", #對行標(biāo)準(zhǔn)化
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100) # 熱圖基準(zhǔn)顏色
)
2. 無分類信息熱圖
# 將繪制熱圖部分替換為下列代碼
# 繪制熱圖=====================================
pheatmap(exp_ds,
show_rownames = T,
show_colnames = T,
scale = "row",
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100)
)
3. 無聚類熱圖
# 將繪制熱圖部分替換為下列代碼
pheatmap(exp_ds, #表達(dá)數(shù)據(jù)
cluster_rows = F,
cluster_cols = F,
show_rownames = T,
show_colnames = T,
scale = "row",
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100)
)
4. 分割聚類樹熱圖
# 繪制熱圖=====================================
pheatmap(exp_ds,
show_rownames = T,
show_colnames = T,
scale = "row",
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100),
cutree_cols = 2,
cutree_rows = 20
)
5. 多分組聚類熱圖
# 清空暫存數(shù)據(jù)
rm(list=ls())
# 載入R包
library(pheatmap)
# 設(shè)置工作目錄
setwd("E:/R/WorkSpace/baimoc/visualization")
# 整理數(shù)據(jù)集====================================
# 參數(shù)'./resource/dataset.txt',表示載入E:/R/WorkSpace/baimoc/visualization/resource/dataset_heatmap.txt
dataset <- read.table('resource/dataset_heatmap.txt',header = TRUE, row.names = 1)
# 截取表達(dá)矩陣的一部分?jǐn)?shù)據(jù)來繪制熱圖
exp_ds = dataset[c(1:60),c(1:10)]
# 構(gòu)建樣本分類數(shù)據(jù)
cell_type=c(rep('cell_1',5),
rep('cell_2',5))
sample_calss=c(rep('normal',5),
rep('cancer',5))
sample_type=c(rep('control',5),
rep('case',5))
level = c(1:10)
annotation_c <- data.frame(cell_type, sample_calss, sample_type, level)
rownames(annotation_c) <- colnames(exp_ds)
gene_class=c(rep('good',30),
rep('bad',30))
gene_type=c(rep('fat',20),
rep('blood',20),
rep('Immunology',20))
annotation_r <- data.frame(gene_class, gene_type)
rownames(annotation_r) <- rownames(exp_ds)
# 繪制熱圖=====================================
pheatmap(exp_ds, #表達(dá)數(shù)據(jù)
cluster_rows = T,#行聚類
cluster_cols = T,#列聚類
annotation_col =annotation_c, #樣本分類數(shù)據(jù)
annotation_row = annotation_r,
annotation_legend=TRUE, # 顯示樣本分類
show_rownames = T,# 顯示行名
show_colnames = T,# 顯示列名
scale = "row", #對行標(biāo)準(zhǔn)化
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100), # 熱圖基準(zhǔn)顏色
)
6. 分組調(diào)色
# 清空暫存數(shù)據(jù)
rm(list=ls())
# 載入R包
library(pheatmap)
# 設(shè)置工作目錄
setwd("E:/R/WorkSpace/baimoc/visualization")
# 整理數(shù)據(jù)集====================================
# 參數(shù)'./resource/dataset.txt'嫁盲,表示載入E:/R/WorkSpace/baimoc/visualization/resource/dataset_heatmap.txt
dataset <- read.table('resource/dataset_heatmap.txt',header = TRUE, row.names = 1)
# 截取表達(dá)矩陣的一部分?jǐn)?shù)據(jù)來繪制熱圖
exp_ds = dataset[c(1:60),c(1:10)]
# 構(gòu)建樣本分類數(shù)據(jù)
sample_calss=c(rep('Normal',5),
rep('Cancer',5))
annotation_c <- data.frame(sample_calss)
rownames(annotation_c) <- colnames(exp_ds)
gene_type=c(rep('Fat',20),
rep('Blood',20),
rep('Immunology',20))
annotation_r <- data.frame(gene_type)
rownames(annotation_r) <- rownames(exp_ds)
annotation_colors = list(sample_calss=c(Normal='#F8EFBA', Cancer='#FD7272'),
gene_type=c(Fat='#f1f2f6', Blood='#ced6e0', Immunology='#57606f'))
# 繪制熱圖=====================================
pheatmap(exp_ds, #表達(dá)數(shù)據(jù)
cluster_rows = T,#行聚類
cluster_cols = T,#列聚類
annotation_col =annotation_c, #樣本分類數(shù)據(jù)
annotation_row = annotation_r,
annotation_colors = annotation_colors,
annotation_legend=TRUE, # 顯示樣本分類
show_rownames = T,# 顯示行名
show_colnames = T,# 顯示列名
scale = "row", #對行標(biāo)準(zhǔn)化
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100), # 熱圖基準(zhǔn)顏色
)
7. 顯示文本
# 繪制熱圖=====================================
pheatmap(exp_ds,
show_rownames = T,
show_colnames = T,
scale = "row",
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100),
display_numbers = T, # 顯示數(shù)值
fontsize_number = 8, # 設(shè)置字體大小
number_color = '#4a4a4a', #設(shè)置顏色
number_format = '%.2f' # 設(shè)置顯示格式
)
8. 去除描邊
pheatmap(exp_ds, #表達(dá)數(shù)據(jù)
show_rownames = T,# 顯示行名
show_colnames = T,# 顯示列名
scale = "row", #對行標(biāo)準(zhǔn)化
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100), # 熱圖基準(zhǔn)顏色
border_color = 'NA',
)
9. 字體相關(guān)
pheatmap(exp_ds, #表達(dá)數(shù)據(jù)
show_rownames = T,# 顯示行名
show_colnames = T,# 顯示列名
scale = "row", #對行標(biāo)準(zhǔn)化
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100), # 熱圖基準(zhǔn)顏色
fontsize = 10, # 全局字體大小篓叶,會被后邊設(shè)置所覆蓋
fontsize_row = 8, # 行字體大小
fontsize_col = 12, # 列字體大小
angle_col = 45, # 設(shè)置列偏轉(zhuǎn)角度,可選 270, 0, 45, 90, 315羞秤,
gaps_row = T
)
10. 調(diào)整聚類樹高
pheatmap(exp_ds,
show_rownames = T,
show_colnames = T,
scale = "row",
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100),
treeheight_row = 50,
treeheight_col = 30
)
11. 聚類方法選擇
pheatmap(exp_ds,
show_rownames = T,
show_colnames = T,
scale = "row",
color =colorRampPalette(c("#8854d0", "#ffffff","#fa8231"))(100),
clustering_distance_rows = 'euclidean', # 計(jì)算聚類間距的算法缸托,可選'correlation', 'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski'
clustering_method = 'complete', # 聚類方法, 可選'ward', 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'
)
四. 保存為圖片
-
這里可導(dǎo)出像素圖和PDF,也可拷貝到PS調(diào)整
選擇合適的文件格式瘾蛋,調(diào)整合適長寬俐镐,印刷或投稿選PDF,TIFF哺哼,EPS就好
-
文件默認(rèn)存儲在剛剛設(shè)置的工作目錄里
五. 詳細(xì)參數(shù)設(shè)置說明
1. 設(shè)置工作目錄
setwd("E:/R/WorkSpace/baimoc/visualization")
在R的執(zhí)行過程中佩抹,為了方便叼风,需要指定一個(gè)獲取文件和輸出文件所在的目錄,這樣就不需要每次設(shè)置全路徑棍苹,只需要指定相對目錄
setwd("E:/R/WorkSpace/baimoc/visualization")
的意思就是設(shè)置工作目錄為E:/R/WorkSpace/baimoc/visualization
2. 載入數(shù)據(jù)
dataset <- read.table('resource/dataset_heatmap.txt',header = TRUE, row.names = 1)
因?yàn)楣ぷ髂夸浺呀?jīng)設(shè)置无宿,如果要獲取E:/R/WorkSpace/baimoc/visualization/resource/dataset_heatmap.txt
文件,那么就只需要設(shè)置相對路徑resource/dataset_heatmap.txt
對于header = TRUE, row.names = 1
代表讀取文件表頭枢里,設(shè)置第一列為行名
3. 獲取數(shù)據(jù)子集
# 截取表達(dá)矩陣的一部分?jǐn)?shù)據(jù)來繪制熱圖
exp_ds = dataset[c(1:60),c(1:10)]
原始數(shù)據(jù):
如果獲取前兩個(gè)基因和cell1與cell2的前兩個(gè)樣本孽鸡,只需要執(zhí)行
exp_ds = dataset[c(1:5),c(1:3,6:8)]
4. 樣本分類數(shù)據(jù)
# 構(gòu)建樣本分類數(shù)據(jù)
cell_list=c(rep('cell_1',5),
rep('cell_2',5))
annotation_c <- data.frame(cell_list)
rownames(annotation_c) <- colnames(exp_ds)
這段代碼目的是構(gòu)建分類名與原始數(shù)據(jù)的列名的對應(yīng)關(guān)系