來自 https://mp.weixin.qq.com/s/9IrY9kajZG2SJcoewUVH8w
-
1 簡介
數(shù)據(jù)介紹:
該數(shù)據(jù)是從丁香園·丁香醫(yī)生通過爬蟲獲取的全國2019-ncov病毒的感染病例编饺。
時間的分辨率:1小時
空間分辨率:城市和省份
起止時間:從2020/1/25/17時到疫情結(jié)束
-
2 需要的包
devtools::install_github("microly/alimap")
library(alimap) # to get China map at the prefecture city level
library(sf)
library(ggplot2)
library(dplyr)
library(tibble)
library(tidyr)
library(magrittr)
library(purrr)
library(readr)
library(stringr)
library(gganimate)
library(lubridate)
library(Cairo)
library(magick)
-
3 地圖數(shù)據(jù)
如果有本地數(shù)據(jù),可以自行讀取响驴。
因為很多市級地名存在變化透且,
而且爬取的比較亂,部分沒有“市”這個字豁鲤,
所以使用前2個漢字進行聯(lián)結(jié)表秽誊。以地圖數(shù)據(jù)集中的城市名為準。
Chinamap_cities_sf <- map_prefecture_city() %>%
mutate(c2 = str_sub(name, 1, 2))
-
4 時間序列
每12小時更新1次琳骡,從早上9點到晚上9點锅论。
# set start day
startTime <- ymd_h("2020/1/25 21")
nowTime <- Sys.time() %>% with_tz(tz = "Asia/Shanghai") # only support Shanghai timezone
endTime <- if(hour(nowTime) > 21) {
date(nowTime) + dhours(21)
} else if (hour(nowTime) > 9){
date(nowTime) + dhours(9)
} else {
date(nowTime) - ddays(1) + dhours(21)
}
timeLength <- interval(startTime, endTime) %>%
time_length("hour") %>% `/`(12)
# time sequence
mytime <- startTime + dhours(12*(0:timeLength)) %>% .[-6] # 404 at the time
mymonth <- month(mytime)
myday <- day(mytime)
myhour <- hour(mytime) %>% as.character() %>%
str_pad(width = 2, side = "left", pad = "0") # make character string same length
myAPI <- paste(date(mytime), myhour, sep = "T")
-
5 疫情數(shù)據(jù)
通過API接口讀取疫情歷史數(shù)據(jù),API接口由網(wǎng)友提供日熬,爬取自丁香園棍厌。
# define a function to read epidemic data of a day
read_epidemic <- function(oneAPI) {
url_API <- paste0("http://69.171.70.18:5000/download/city_level_", oneAPI, ".csv")
epidemic_df <- read_csv(file = url_API)
colnames(epidemic_df) <- c("x1","unnamed", "city", "confirmed_c", "suspected_c",
"cured_c", "dead_c", "province", "short_p", "confirmed_p",
"suspected_p", "cured_p", "dead_p", "comment")
epidemic_df %<>% select(city, confirmed_c)
return(epidemic_df)
}
epidemic_nest <- tibble(time = mytime,
myAPI = myAPI) %>%
mutate(., data = map(.$myAPI, ~read_epidemic(.x))) %>%
select(-myAPI) %>% unnest()
-
5.1 分箱
因為很多市級地名存在變化,
而且爬取的比較亂竖席,部分沒有“市”這個字耘纱,
所以使用前2個漢字進行聯(lián)結(jié)表。以地圖數(shù)據(jù)集中的城市名為準毕荐。
mybreaks <- c(0, 1, 10, 50, 100, 500, 1000, 5000, 100000)
mylabels <- c("0", "1-9", "10-49", "50-99", "100-499",
"500-999", "1000-4999", ">=5000")
epidemic_df <- epidemic_nest %>%
mutate(conf2 = cut(confirmed_c, breaks = mybreaks,
labels = mylabels, include.lowest = TRUE,
right = FALSE, ordered_result = TRUE)) %>%
mutate(c2 = str_sub(city, 1, 2))
-
6 聯(lián)結(jié)表及循環(huán)繪圖
# create temporary document
dir.create(dir1 <- file.path(tempdir(), "testdir"))
for (i in 1:length(mytime)) {
# join epidemic data with map data
epidemic_time <- epidemic_df %>% filter(time == mytime[i])
epidemic_city <- Chinamap_cities_sf %>% left_join(epidemic_time, by = "c2")
# treatment NA
conf2 <- epidemic_city$conf2 %>% replace_na(0)
epidemic_city %<>% select(-c2, -city, -conf2)
epidemic_city$conf2 <- conf2
# plot
gg_epidemic <- ggplot(epidemic_city) +
geom_sf(aes(fill = conf2)) +
coord_sf() +
scale_fill_brewer(palette = "YlOrRd", direction = 1) +
guides(fill = guide_legend(title = "確診人數(shù)", reverse = T)) +
labs(title = "2019-ncov疫情數(shù)據(jù)可視化",
subtitle = mytime[i],
caption = "數(shù)據(jù)來源:丁香園·丁香醫(yī)生") +
theme(
# 標題
plot.title = element_text(face = "bold", hjust = 0.5,
color = "black"),
plot.subtitle = element_text(face = "bold", hjust = 0.5, size = 20,
color = "red"),
plot.caption = element_text(face = "bold", hjust = 1,
color = "blue"),
# 圖例
legend.title = element_text(face = "bold",
color = "black"),
legend.text = element_text(face = "bold",
color = "black"),
legend.background = element_rect(colour = "black"),
legend.key = element_rect(fill = NA), # 圖例箱體無背景
legend.position = c(0.85, 0.2),
axis.ticks = element_blank(),
axis.text = element_blank(),
# 繪圖面板
panel.background = element_blank(),
panel.border = element_rect(color = "black", linetype = "solid", size = 1, fill = NA)
)
# save picture
ggsave(filename = paste0(date(mytime[i]), "_", hour(mytime[i]), ".png"),
plot = gg_epidemic, path = dir1,
width = 20, height = 20, units = "cm")
}
-
7 動畫
path_pre <- "./"
animate_epidemic <-
image_animate(image = image_read(path = paste0(dir1, "/",
date(mytime), "_", hour(mytime), ".png")))
anim_save(filename = "疫情地圖可視化動態(tài)圖.gif",
animation = animate_epidemic, path = path_pre)
unlink(dir1)
2019-nCoV疫情地圖動態(tài)可視化