R可視化：基礎(chǔ)圖形可視化之Distribution（三）

數(shù)據(jù)分析的圖形可視化是了解數(shù)據(jù)分布彪标、波動和相關(guān)性等屬性必不可少的手段德频。數(shù)據(jù)分布可視化圖形主要有：小提琴圖、核密度曲線圖、柱狀圖、箱線圖和山脊圖等泉哈。

小提琴圖Violin

# Libraries
library(ggplot2)
library(dplyr)
library(hrbrthemes)
library(viridis)

# create a dataset
data <- data.frame(
  name=c( rep("A",500), rep("B",500), rep("B",500), rep("C",20), rep('D', 100)  ),
  value=c( rnorm(500, 10, 5), rnorm(500, 13, 1), rnorm(500, 18, 1), rnorm(20, 25, 4), rnorm(100, 12, 1) )
)

# sample size
sample_size = data %>% group_by(name) %>% summarize(num=n())

# Plot
data %>%
  left_join(sample_size) %>%
  mutate(myaxis = paste0(name, "\n", "n=", num)) %>%
  ggplot( aes(x=myaxis, y=value, fill=name)) +
    geom_violin(width=1.4) +
    geom_boxplot(width=0.1, color="grey", alpha=0.2) +
    scale_fill_viridis(discrete = TRUE) +
    theme_ipsum() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("A Violin wrapping a boxplot") +
    xlab("")

# Libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)

# Load dataset from github
data <- read.table("dataset/viz/probly.csv", header=TRUE, sep=",")

# Data is at wide format, we need to make it 'tidy' or 'long'
data <- data %>% 
  gather(key="text", value="value") %>%
  mutate(text = gsub("\\.", " ",text)) %>%
  mutate(value = round(as.numeric(value),0)) %>%
  filter(text %in% c("Almost Certainly","Very Good Chance","We Believe","Likely","About Even", "Little Chance", "Chances Are Slight", "Almost No Chance"))

# Plot
p <- data %>%
  mutate(text = fct_reorder(text, value)) %>% # Reorder data
  ggplot( aes(x=text, y=value, fill=text, color=text)) +
    geom_violin(width=2.1, size=0.2) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_ipsum() +
    theme(
      legend.position="none"
    ) +
    coord_flip() + # This switch X and Y axis and allows to get the horizontal version
    xlab("") +
    ylab("Assigned Probability (%)")

p

核密度圖 density chart

library(ggplot2)
library(hrbrthemes)
library(dplyr)
library(tidyr)
library(viridis)

data <- read.table("dataset/viz/probly.csv", header=TRUE, sep=",")
data <- data %>%
  gather(key="text", value="value") %>%
  mutate(text = gsub("\\.", " ",text)) %>%
  mutate(value = round(as.numeric(value),0))

# A dataframe for annotations
annot <- data.frame(
  text = c("Almost No Chance", "About Even", "Probable", "Almost Certainly"),
  x = c(5, 53, 65, 79),
  y = c(0.15, 0.4, 0.06, 0.1)
)

# Plot
data %>%
  filter(text %in% c("Almost No Chance", "About Even", "Probable", "Almost Certainly")) %>%
  ggplot( aes(x=value, color=text, fill=text)) +
    geom_density(alpha=0.6) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    geom_text( data=annot, aes(x=x, y=y, label=text, color=text), hjust=0, size=4.5) +
    theme_ipsum() +
    theme(
      legend.position="none"
    ) +
    ylab("") +
    xlab("Assigned Probability (%)")

# library
library(ggplot2)
library(ggExtra)
 
# classic plot :
p <- ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, size=cyl)) +
      geom_point() +
      theme(legend.position="none")
 
# Set relative size of marginal plots (main plot 10x bigger than marginals)
p1 <- ggMarginal(p, type="histogram", size=10)
 
# Custom marginal plots:
p2 <- ggMarginal(p, type="histogram", fill = "slateblue", xparams = list(  bins=10))
 
# Show only marginal plot for x axis
p3 <- ggMarginal(p, margins = 'x', color="purple", size=4)

cowplot::plot_grid(p, p1, p2, p3, ncol = 2, align = "hv", 
                   labels = LETTERS[1:4])

柱狀圖 histogram

# library
library(ggplot2)
library(dplyr)
library(hrbrthemes)

# Build dataset with different distributions
data <- data.frame(
  type = c( rep("variable 1", 1000), rep("variable 2", 1000) ),
  value = c( rnorm(1000), rnorm(1000, mean=4) )
)

# Represent it
p <- data %>%
  ggplot( aes(x=value, fill=type)) +
    geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_fill_manual(values=c("#69b3a2", "#404080")) +
    theme_ipsum() +
    labs(fill="")
p

# Libraries
library(ggplot2)
library(hrbrthemes)

# Dummy data
data <- data.frame(
  var1 = rnorm(1000),
  var2 = rnorm(1000, mean=2)
)

# Chart
p <- ggplot(data, aes(x=x) ) +
  # Top
  geom_density( aes(x = var1, y = ..density..), fill="#69b3a2" ) +
  geom_label( aes(x=4.5, y=0.25, label="variable1"), color="#69b3a2") +
  # Bottom
  geom_density( aes(x = var2, y = -..density..), fill= "#404080") +
  geom_label( aes(x=4.5, y=-0.25, label="variable2"), color="#404080") +
  theme_ipsum() +
  xlab("value of x")

p1 <- ggplot(data, aes(x=x) ) +
  geom_histogram( aes(x = var1, y = ..density..), fill="#69b3a2" ) +
  geom_label( aes(x=4.5, y=0.25, label="variable1"), color="#69b3a2") +
  geom_histogram( aes(x = var2, y = -..density..), fill= "#404080") +
  geom_label( aes(x=4.5, y=-0.25, label="variable2"), color="#404080") +
  theme_ipsum() +
  xlab("value of x")
cowplot::plot_grid(p, p1, ncol = 2, align = "hv", 
                   labels = LETTERS[1:2])

箱線圖 boxplot

# Library
library(ggplot2)
library(dplyr)
library(forcats)

# Dataset 1: one value per group
data <- data.frame(
  name=c("north","south","south-east","north-west","south-west","north-east","west","east"),
  val=sample(seq(1,10), 8 )
)


# Reorder following the value of another column:
p1 <- data %>%
  mutate(name = fct_reorder(name, val)) %>%
  ggplot( aes(x=name, y=val)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
    coord_flip() +
    xlab("") +
    theme_bw()
 
# Reverse side
p2 <- data %>%
  mutate(name = fct_reorder(name, desc(val))) %>%
  ggplot( aes(x=name, y=val)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
    coord_flip() +
    xlab("") +
    theme_bw()

# Using median
p3 <- mpg %>%
  mutate(class = fct_reorder(class, hwy, .fun='median')) %>%
  ggplot( aes(x=reorder(class, hwy), y=hwy, fill=class)) + 
    geom_boxplot() +
    geom_jitter(color="black", size=0.4, alpha=0.9) +
    xlab("class") +
    theme(legend.position="none") +
    xlab("")
 
# Using number of observation per group
p4 <- mpg %>%
  mutate(class = fct_reorder(class, hwy, .fun='length' )) %>%
  ggplot( aes(x=class, y=hwy, fill=class)) + 
  stat_summary(fun.y=mean, geom="point", shape=20, size=6, color="red", fill="red") +
    geom_boxplot() +
    xlab("class") +
    theme(legend.position="none") +
    xlab("") +
    xlab("")

p5 <- data %>%
  arrange(val) %>%    # First sort by val. This sort the dataframe but NOT the factor levels
  mutate(name=factor(name, levels=name)) %>%   # This trick update the factor levels
  ggplot( aes(x=name, y=val)) +
    geom_segment( aes(xend=name, yend=0)) +
    geom_point( size=4, color="orange") +
    coord_flip() +
    theme_bw() +
    xlab("")
 
p6 <- data %>%
  arrange(val) %>%
  mutate(name = factor(name, levels=c("north", "north-east", "east", "south-east", "south", "south-west", "west", "north-west"))) %>%
  ggplot( aes(x=name, y=val)) +
    geom_segment( aes(xend=name, yend=0)) +
    geom_point( size=4, color="orange") +
    theme_bw() +
    xlab("")

cowplot::plot_grid(p1, p2, p3, p4, p5, p6, 
                   ncol = 2, align = "hv", 
                   labels = LETTERS[1:6])

library(dplyr)
# Dummy data
names <- c(rep("A", 20) , rep("B", 8) , rep("C", 30), rep("D", 80))
value <- c( sample(2:5, 20 , replace=T) , sample(4:10, 8 , replace=T), 
       sample(1:7, 30 , replace=T), sample(3:8, 80 , replace=T) )
data <- data.frame(names, value) %>%
  mutate(names=factor(names))
 
# Draw the boxplot. Note result is also stored in a object called boundaries
boundaries <- boxplot(data$value ~ data$names , col="#69b3a2" , ylim=c(1,11))
# Now you can type boundaries$stats to get the boundaries of the boxes

# Add sample size on top
nbGroup <- nlevels(data$names)
text( 
  x=c(1:nbGroup), 
  y=boundaries$stats[nrow(boundaries$stats),] + 0.5, 
  paste("n = ",table(data$names),sep="")  
)

山脊圖 ridgeline

# library
library(ggridges)
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)

# Load dataset from github
data <- read.table("dataset/viz/probly.csv", header=TRUE, sep=",")
data <- data %>% 
  gather(key="text", value="value") %>%
  mutate(text = gsub("\\.", " ",text)) %>%
  mutate(value = round(as.numeric(value),0)) %>%
  filter(text %in% c("Almost Certainly","Very Good Chance","We Believe","Likely","About Even", "Little Chance", "Chances Are Slight", "Almost No Chance"))

# Plot
p1 <- data %>%
  mutate(text = fct_reorder(text, value)) %>%
  ggplot( aes(y=text, x=value,  fill=text)) +
    geom_density_ridges(alpha=0.6, stat="binline", bins=20) +
    theme_ridges() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
    xlab("") +
    ylab("Assigned Probability (%)")

p2 <- data %>%
  mutate(text = fct_reorder(text, value)) %>%
  ggplot( aes(y=text, x=value,  fill=text)) +
    geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
    theme_ridges() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
    xlab("") +
    ylab("Assigned Probability (%)")

cowplot::plot_grid(p1, p2, 
                   ncol = 2, align = "hv", 
                   labels = LETTERS[1:2])

參考

The R Graph Gallery

最后編輯于：2024.06.12 13:36:47

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

禁止轉(zhuǎn)載蛉幸，如需轉(zhuǎn)載請通過簡信或評論聯(lián)系作者。

人面猴
序言：七十年代末丛晦，一起剝皮案震驚了整個(gè)濱河市奕纫，隨后出現(xiàn)的幾起案子，更是在濱河造成了極大的恐慌烫沙，老刑警劉巖匹层，帶你破解...
沈念sama閱讀 218,284評論 6贊 506
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件，死亡現(xiàn)場離奇詭異锌蓄，居然都是意外死亡升筏，警方通過查閱死者的電腦和手機(jī)，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 93,115評論 3贊 395
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進(jìn)店門瘸爽，熙熙樓的掌柜王于貴愁眉苦臉地迎上來您访，“玉大人，你說我怎么就攤上這事剪决×橥簦” “怎么了？”我有些...
開封第一講書人閱讀 164,614評論 0贊 354
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵柑潦，是天一觀的道長享言。經(jīng)常有香客問我，道長渗鬼，這世上最難降的妖魔是什么担锤？我笑而不...
開封第一講書人閱讀 58,671評論 1贊 293
?港島之戀（遺憾婚禮）
正文為了忘掉前任，我火速辦了婚禮乍钻，結(jié)果婚禮上肛循，老公的妹妹穿的比我還像新娘。我一直安慰自己银择，他們只是感情好多糠，可當(dāng)我...
茶點(diǎn)故事閱讀 67,699評論 6贊 392
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布。她就那樣靜靜地躺著浩考，像睡著了一般夹孔。火紅的嫁衣襯著肌膚如雪。梳的紋絲不亂的頭發(fā)上析孽，一...
開封第一講書人閱讀 51,562評論 1贊 305
城市分裂傳說
那天搭伤，我揣著相機(jī)與錄音，去河邊找鬼袜瞬。笑死怜俐，一個(gè)胖子當(dāng)著我的面吹牛，可吹牛的內(nèi)容都是我干的邓尤。我是一名探鬼主播拍鲤，決...
沈念sama閱讀 40,309評論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼，長吁一口氣：“原來是場噩夢啊……” “哼汞扎！你這毒婦竟也來了季稳？” 一聲冷哼從身側(cè)響起，我...
開封第一講書人閱讀 39,223評論 0贊 276
萬榮殺人案實(shí)錄
序言：老撾萬榮一對情侶失蹤澈魄，失蹤者是張志新（化名）和其女友劉穎景鼠，沒想到半個(gè)月后，有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體痹扇，經(jīng)...
沈念sama閱讀 45,668評論 1贊 314
?護(hù)林員之死
正文獨(dú)居荒郊野嶺守林人離奇死亡铛漓，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點(diǎn)故事閱讀 37,859評論 3贊 336
?白月光啟示錄
正文我和宋清朗相戀三年，在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了帘营。大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片票渠。...
茶點(diǎn)故事閱讀 39,981評論 1贊 348
活死人
序言：一個(gè)原本活蹦亂跳的男人離奇死亡，死狀恐怖芬迄，靈堂內(nèi)的尸體忽然破棺而出问顷，到底是詐尸還是另有隱情，我是刑警寧澤禀梳，帶...
沈念sama閱讀 35,705評論 5贊 347
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布杜窄，位于F島的核電站，受9級特大地震影響算途，放射性物質(zhì)發(fā)生泄漏塞耕。R本人自食惡果不足惜，卻給世界環(huán)境...
茶點(diǎn)故事閱讀 41,310評論 3贊 330
男人毒藥：我在死后第九天來索命
文/蒙蒙一嘴瓤、第九天我趴在偏房一處隱蔽的房頂上張望扫外。院中可真熱鬧莉钙，春花似錦、人聲如沸筛谚。這莊子的主人今日做“春日...
開封第一講書人閱讀 31,904評論 0贊 22
一樁弒父案，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽驾讲。三九已至蚊伞，卻和暖如春，著一層夾襖步出監(jiān)牢的瞬間吮铭，已是汗流浹背时迫。一陣腳步聲響...
開封第一講書人閱讀 33,023評論 1贊 270
情欲美人皮
我被黑心中介騙來泰國打工，沒想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留谓晌，地道東北人掠拳。一個(gè)月前我還...
沈念sama閱讀 48,146評論 3贊 370
代替公主和親
正文我出身青樓，卻偏偏與公主長得像扎谎，于是被迫代替她去往敵國和親碳想。傳聞我的和親對象是個(gè)殘疾皇子，可洞房花燭夜當(dāng)晚...
茶點(diǎn)故事閱讀 44,933評論 2贊 355