寫在前面
之前曾抱怨過complexheatmap太復(fù)雜了,現(xiàn)在我想說是我太菜了浮禾。折騰了一圈complexheatmap后,發(fā)現(xiàn)complexheatmap真的是極致全面的一個熱圖包了份汗。
TidyTuesday
如果你想入門R或者python盈电,但是沒有數(shù)據(jù)給你折騰而發(fā)愁。那么我推薦你去看看TidyTueday杯活。點這里
每周二一個新的數(shù)據(jù)包匆帚,覆蓋各種類型。比如今天我們折騰的數(shù)據(jù)就是來自TidyTuesday旁钧。想要獲取數(shù)據(jù)非常簡單吸重,只需要安裝TidyTuesday的包。
install.packages("tidytuesdayR")
首先讀入需要的包歪今。
#Loading necessary packages
library(tidytuesdayR)
library(tidyverse)
library(DataExplorer) # easy way for data exploring
然后
# Import data from tidytuesday
tuesdata <- tidytuesdayR::tt_load('2020-09-01')
# or
tuesdata <- tidytuesdayR::tt_load(2020, week = 36)
--- Compiling #TidyTuesday Information for 2020-09-01 ----
--- There are 5 files available ---
--- Starting Download ---
Downloading file 1 of 5: `arable_land_pin.csv`
Downloading file 2 of 5: `cereal_crop_yield_vs_fertilizer_application.csv`
Downloading file 3 of 5: `cereal_yields_vs_tractor_inputs_in_agriculture.csv`
Downloading file 4 of 5: `key_crop_yields.csv`
Downloading file 5 of 5: `land_use_vs_yield_change_in_cereal_production.csv`
--- Download complete ---
當(dāng)然嚎幸,具體請考慮自己的網(wǎng)速。因為數(shù)據(jù)保存在github上……
數(shù)據(jù)可視化
key_crop_yields <- tuesdata$key_crop_yields
key_crop_yields %>%
filter(Code != ""& Entity != "World") %>%
select(-(2)) %>%
create_report(
config = configure_report(
add_plot_bar = FALSE
)
)
這次使用DataExplorer自動生成報告寄猩,當(dāng)然你可以通過函數(shù)查看更具體的內(nèi)容嫉晶。
key_crop_yields %>%
filter(
Entity %in% c(
"Japan",
"China",
"United States",
"Brazil",
"Indonesia",
"India",
"Pakistan",
"Nigeria",
"Bangladesh",
"Russia",
"Mexico"
)
) %>%
select(-(2)) %>%
mutate(Year = as.factor(Year),
Entity = as.factor(Entity)) %>% plot_boxplot(by = c("Year"))
可可的產(chǎn)量很有意思啊。
或者
key_crop_yields %>%
drop_na() %>%
filter(Code != "" & Entity != "World" & Year == 2018) %>%
select(-(1:3)) %>%
plot_correlation(type = "c")
當(dāng)然焦影,也能用ggplot2
key_crop_yields %>%
filter(
Entity %in% c(
"Japan",
"China",
"United States",
"Brazil",
"Indonesia",
"India",
"Pakistan",
"Nigeria",
"Bangladesh",
"Russia",
"Mexico"
)
) %>%
ggplot(., aes(x = Year, y = `Wheat (tonnes per hectare)`, color = Entity)) +
geom_point(size = .1) +
geom_smooth(method = 'lm') +
theme_bw()
尼日利亞的小麥產(chǎn)量居然是下降的车遂,聽說他們主糧是一種豆子。中國小麥公頃產(chǎn)增長超級快斯辰,那么中國小麥公頃產(chǎn)達到世界前列了么舶担?一張熱圖告訴你。
注:這里選擇的是人口前11的國家彬呻。
世界小麥公頃產(chǎn)量熱圖
hmmm 看來不是…… 什么你問熱圖怎么做的衣陶?complexheatmap 氨濉!
代碼具體參考kevinblighe/E-MTAB-6141: Data from Lewis, Barnes, Blighe et al., Cell Rep. 2019 Aug 27; 28(9): 2455–2470.e5. (github.com)
#載入包
require(RColorBrewer)
require(ComplexHeatmap)
require(circlize)
require(cluster)
require(countrycode)
然后整理一下數(shù)據(jù)
set.seed(2021)
yield_heatmap <- key_crop_yields %>%
filter(!is.na(Code)) %>%
select(Entity, Year, `Wheat (tonnes per hectare)`) %>%
spread(Year, `Wheat (tonnes per hectare)`) %>%
filter(Entity != "World") %>%
column_to_rownames(var = "Entity") %>%
drop_na()
key_crop_yields %>%
filter(str_detect(Entity,"Sudan")) %>%
select(Entity, Year, `Wheat (tonnes per hectare)`) %>%
drop_na() %>%
mutate(Entity = "Sudan") %>%
arrange(Year) %>%
spread(Year, `Wheat (tonnes per hectare)`) %>%
column_to_rownames(var = "Entity") %>%
bind_rows(yield_heatmap) -> heat
上面做了兩件事剪况,一個是把數(shù)據(jù)里各種大洲和世界數(shù)據(jù)去掉了教沾,另一個就是把蘇丹數(shù)據(jù)跟前蘇丹的數(shù)據(jù)合并了。
注:這里指的是Entity不是Country译断,防杠還是去掉了一些有爭議的地區(qū)授翻。
然后joint一下地理信息,后來發(fā)現(xiàn)起始直接通過Code就能獲得準(zhǔn)確信息了……以后說吧孙咪。
rownames(heat) %>% data.frame(country = .) -> df
df$Continent <- countrycode(sourcevar = df[, "country"],
origin = "country.name",
destination = "continent")
接下來是定義五大洲板塊的色塊注釋堪唐。
ann <- data.frame(Continent = df$Continent,
row.names = df$country,
stringsAsFactors = FALSE)
colours <- list(
Continent = c('Africa' = '#2171B5', 'Asia' = '#6A51A3', 'Europe' = 'red2', 'Americas' = 'green3', 'Oceania' = 'orange')
)
colAnn <- HeatmapAnnotation(
df = ann,
which = 'row', # 'col' (samples) or 'row' (gene) annotation?
col = colours,
annotation_height = 0.6,
annotation_width = unit(1, 'cm'),
gap = unit(1, 'mm'),
annotation_legend_param = list(
Continent = list(
nrow = 5,
title = 'Continent',
title_position = 'topcenter',
legend_direction = 'vertical',
title_gp = gpar(fontsize = 12, fontface = 'bold'),
labels_gp = gpar(fontsize = 12, fontface = 'bold'))
)
)
頂部的箱線圖注釋
pick.col <- brewer.pal(5, 'Greens')
boxplotCol <- HeatmapAnnotation(
Average = anno_boxplot(
heat,
border = FALSE,
gp = gpar(fill = colorRampPalette(pick.col)(length(1:58))),
pch = '.',
size = unit(2, 'mm'),
axis = TRUE,
axis_param = list(
gp = gpar(fontsize = 12),
side = 'left')),
annotation_width = unit(c(2.0), 'cm'),
which = 'col')
然后是頂部線圖的注釋
heat_anno <- HeatmapAnnotation(
Average = anno_lines(
apply(heat, 2, mean),
border = TRUE,
gp = gpar(col = "red2"),
size = unit(2, 'mm'),
axis = TRUE,
axis_param = list(
gp = gpar(fontsize = 10),
side = 'left')),
Distribution = anno_boxplot(
heat,
border = FALSE,
gp = gpar(fill = colorRampPalette(pick.col)(length(1:58))),
pch = '.',
size = unit(2, 'mm'),
axis = TRUE,
axis_param = list(
gp = gpar(fontsize = 12),
side = 'left')),
annotation_width = unit(c(2.0), 'cm'),
which = 'col')
當(dāng)然你也可以換成柱狀圖
barplotCol <- HeatmapAnnotation(
Average = anno_barplot(
apply(heat, 2, mean),
border = TRUE,
gp = gpar(fill = colorRampPalette(pick.col)(length(1:58))),
size = unit(2, 'mm'),
axis = TRUE,
axis_param = list(
gp = gpar(fontsize = 12),
side = 'left')),
annotation_width = unit(c(2.0), 'cm'),
which = 'col')
然后開始畫圖熱圖吧
說個題外話,如果你對魔改complexheatmap有興趣翎蹈,強烈建議看看這兒
ComplexHeatmap Complete Reference (jokergoo.github.io)
雖然看的人想喊師傅別念了別念了……
myCol <- colorRampPalette(c('dodgerblue', 'black', 'yellow'))(100) #三色
myBreaks <- seq(1, 8, length.out = 100) #設(shè)置色彩范圍
pamClusters <- cluster::pam(heat, k = 5) #設(shè)置聚類數(shù)
Heatmap(
heat,
name = 'Wheat
(tonnes per hectare)',
col = colorRamp2(myBreaks, myCol), #這個顏色設(shè)定值得學(xué)習(xí)淮菠,做出來很高級
row_split = factor(pamClusters$clustering, levels = c(5,4,3,1,2)), #聚類排序,
cluster_row_slices = FALSE, #不解釋荤堪,自己看manual
heatmap_legend_param = list( #設(shè)置標(biāo)簽
color_bar = 'continuous',
legend_direction = 'vertical',
legend_width = unit(8, 'cm'),
legend_height = unit(5.0, 'cm'),
title_position = 'topcenter',
title_gp = gpar(fontsize = 12, fontface = 'bold'),
labels_gp = gpar(fontsize = 12, fontface = 'bold')
),
# row parameters
cluster_rows = TRUE, #聚類
show_row_dend = TRUE, #顯示聚類
row_title = "Cluster",
row_title_gp = gpar(fontsize = 10, fontface = 'bold'), #字號合陵,粗體
row_title_rot = 90,
show_row_names = TRUE,
row_names_gp = gpar(fontsize = 10, fontface = 'bold'),
row_names_side = 'right',
row_dend_width = unit(25, 'mm'),
# column parameters
cluster_columns = FALSE,
column_title = 'Year',
column_title_side = 'bottom',
column_title_gp = gpar(fontsize = 10, fontface = 'bold'),
column_title_rot = 0,
show_column_names = TRUE,
column_names_gp = gpar(fontsize = 10, fontface = 'bold'),
column_names_max_height = unit(10, 'cm'),
column_dend_height = unit(25, 'mm'),
# cluster methods for rows and columns
clustering_distance_rows = function(m) dist(m),
row_dend_reorder = T,
clustering_method_rows = 'ward.D2',
top_annotation = heat_anno,
right_annotation = colAnn
) -> heat_map
heat_map
是不是很簡單呢!做完我都不記得那些參數(shù)是干嘛的了……
那么有沒有簡單的辦法呢澄阳!有個低配版拥知,效果嘛……
yield_mean <- apply(heat, 2, mean) %>% data.frame(average = .) %>% mutate(year = rownames(.))
heat %>%
mutate(country = rownames(.)) %>%
left_join(., df, by = "country") %>%
gather(year, yield, -country, -Continent ) %>%
left_join(., yield_mean, by = "year") %>%
as.tibble() %>%
tidyHeatmap::heatmap(country, year, yield,
cluster_columns = FALSE,
.scale = "none",
transform = NULL) %>%
tidyHeatmap::add_tile(Continent) %>%
tidyHeatmap::add_bar(average)
怎么說呢,又不是不能用寇荧!