數(shù)據(jù)主題:Rap Artists
數(shù)據(jù)源:
https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv
這次挑的圖比較簡單钙勃,比較有意思的就是純黑背景
實際就是封裝成一個函數(shù)可以單獨調(diào)用
前置知識
- 黑色主題
黑色主題代碼:Black theme for ggplot2
主題代碼和簡單示例如下:
theme_black = function(base_size = 12, base_family = "") {
theme_grey(base_size = base_size, base_family = base_family) %+replace%
theme(
# Specify axis options
axis.line = element_blank(),
axis.text.x = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),
axis.text.y = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),
axis.ticks = element_line(color = "white", size = 0.2),
axis.title.x = element_text(size = base_size, color = "white", margin = margin(0, 10, 0, 0)),
axis.title.y = element_text(size = base_size, color = "white", angle = 90, margin = margin(0, 10, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
# Specify legend options
legend.background = element_rect(color = NA, fill = "black"),
legend.key = element_rect(color = "white", fill = "black"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(size = base_size*0.8, color = "white"),
legend.title = element_text(size = base_size*0.8, face = "bold", hjust = 0, color = "white"),
legend.position = "right",
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
# Specify panel options
panel.background = element_rect(fill = "black", color = NA),
panel.border = element_rect(fill = NA, color = "white"),
panel.grid.major = element_line(color = "grey35"),
panel.grid.minor = element_line(color = "grey20"),
panel.spacing = unit(0.5, "lines"),
# Specify facetting options
strip.background = element_rect(fill = "grey30", color = "grey10"),
strip.text.x = element_text(size = base_size*0.8, color = "white"),
strip.text.y = element_text(size = base_size*0.8, color = "white",angle = -90),
# Specify plot options
plot.background = element_rect(color = "black", fill = "black"),
plot.title = element_text(size = base_size*1.2, color = "white"),
plot.margin = unit(rep(1, 4), "lines")
)
}
library(patchwork)
p1 <- ggplot(mtcars, aes(wt, mpg)) + geom_point()
p2 <- ggplot(mtcars, aes(wt, mpg)) + geom_point(color = "white") + theme_black()
p3 <- iris %>%
ggplot(aes(Sepal.Length,Petal.Length,color=Species)) +
geom_point() +
theme(
legend.position = c(0.85,0.18)
)
p4 <- iris %>%
ggplot(aes(Sepal.Length,Petal.Length,color=Species)) +
geom_point() +
theme_black() +
theme(
legend.position = c(0.85,0.18)
)
(p1 + p3)/(p2 + p4)
- gghighlight
gghighlight()
can highlight almost any geoms.
本質(zhì)就是把感興趣的點highlight
我覺得理解成非感興趣的點unhighlight可能更好
對示例簡單修改后如下:
# 構(gòu)建數(shù)據(jù)
dat <- data.frame(
idx = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
value = c(1, 2, 3, 10, 11, 12, 9, 10, 11),
category = rep(c("apple", "banana", "strawberry"), 3),
stringsAsFactors = FALSE
)
- (1) 各分組最大值符合條件則highlight該分組
p1 <- ggplot(dat, aes(idx, value, colour = category)) +
geom_line() +
gghighlight(max(value) > 10)
可以看到gghighlight會設(shè)置打標(biāo)簽
本質(zhì)就是ggrepel::geom_label_repel()
,所以可以進行個性化調(diào)整
- (2) 分組后按數(shù)據(jù)篩選感興趣的點
ggplot(dat, aes(idx, value)) +
geom_point() +
gghighlight(value > 2, label_key = category)
依然會對highlight的點打上標(biāo)簽
但如果點過多會有如下warning并且不打標(biāo)簽傍妒,上限應(yīng)該是10個
label_key: category
Too many data points, skip labeling
- (2) 可以調(diào)節(jié)非highlight數(shù)據(jù)的參數(shù):
unhighlighted_params
就是標(biāo)識但弱化非感興趣數(shù)據(jù)的存在,舉兩個例子
p1 <- ggplot(dat, aes(idx, value, colour = category)) +
geom_line(size = 4) +
gghighlight(max(value) > 10,
unhighlighted_params = list(size = 2)
)
p2 <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(fill=class),shape=21,color='white',size=2.5,stroke=1) +
gghighlight::gghighlight(hwy > 19,
unhighlighted_params = list(size = 1))
p1 + p2
畫圖
- 第一張圖:歌曲年份和排名的散點圖
# 數(shù)據(jù)源
polls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv')
# 注冊字體
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"),
JBM=windowsFont("JetBrains Mono"))
polls %>%
# 自定義擴大了數(shù)據(jù)范圍利于結(jié)果呈現(xiàn)
mutate(points = case_when(rank == 1 ~ 10,
rank == 2 ~ 8,
rank == 3 ~ 6,
rank == 4 ~ 4,
rank == 5 ~ 2)) %>%
group_by(title, year, gender) %>%
summarise(sum_points = sum(points)) %>%
# arrange(-var) 等價于 arrange(desc(var))
arrange(-sum_points) %>%
ggplot(aes(year, sum_points)) +
geom_point(position = "jitter") +
# 黑色背景下highlight無論是感興趣還是非興趣的數(shù)據(jù)點顏色均會反轉(zhuǎn)成可見
gghighlight::gghighlight(sum_points > 90, label_key = title,
# 這里原作者沒有修改
label_params = list(fill=NA, colour="white",
size = 5, family='ARL',
segment.colour = NA)) +
labs(title = "Top Rated Songs",
x = "Year",
y= "Total Points") +
# 加上黑色主題函數(shù)
theme_black() +
# 如果需要調(diào)整各參數(shù)需要在主題背景之后設(shè)置否則會被覆蓋
theme(
text = element_text(family="ARL"),
plot.title = element_text(size = 20,vjust = 2),
axis.title.x = element_text(size = 16),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 13),
axis.text.y = element_text(size = 13)
)
- 第二張圖:探究歌曲排名與歌手合作的關(guān)系
res <- polls %>%
# 利用正則和文本檢測尋找特定符號 ft 和 & 明確有無合作
mutate(first_artist = gsub( " ft.*$", "", artist ),
first_artist = gsub( " &.*$", "", first_artist ),
collaboration = case_when(str_detect(artist, "ft") ~ "Yes",
str_detect(artist, "&") ~ "Yes",
TRUE ~ "No"),
points = case_when(rank == 1 ~ 10,
rank == 2 ~ 8,
rank == 3 ~ 6,
rank == 4 ~ 4,
rank == 5 ~ 2)) %>%
# 根據(jù)歌手和合作與否分別計算平均歌曲得分
group_by(first_artist, collaboration) %>%
summarise(sum_points = sum(points),
n = n(),
avg_points = sum_points/n) %>%
group_by(first_artist) %>%
# 等價于 filter(n() == 2)恼蓬,篩選出有無合作均發(fā)過歌曲的歌手
filter(n() > 1) %>%
arrange(-avg_points)
res %>%
ggplot(aes(avg_points, first_artist, color = collaboration)) +
geom_line(aes(group = first_artist)) +
geom_point() +
scale_color_manual(values = c("red", "green"), name = "Collaboration?") +
labs(title = "Do artists get higher rankings when they collaborate?",
y = "",
x = "Average Points Rating") +
theme_black() +
theme(
text = element_text(family="ARL"),
plot.title = element_text(size = 17,vjust = 4),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 13),
legend.text = element_text(size = 10)
)