條形圖
Part 1
- 簡單條形圖
- 簇狀條形圖
- 頻數(shù)條形圖
- 條形圖著色
- 正負條形圖分別著色
- 調(diào)整寬度和條形間距
Part 2
- 繪制堆積條形圖
- 繪制百分比堆積條形圖
- 添加數(shù)據(jù)標簽
- 繪制Cleveland點圖
繪制堆積條形圖
library(gcookbook)
library(ggplot2)
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity")
# 有時候會遇到一個問題跟畅,就是堆積的順序和圖例的順序是相反的
# 可以使用guides()函數(shù)對圖例順序進行調(diào)整
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity") +
guides(fill=guide_legend(reverse=TRUE))
# 若想要調(diào)整堆積順序,可以使用desc()函數(shù)進行調(diào)整
library(plyr) #desc()函數(shù)調(diào)用包
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar, order=desc(Cultivar))) +
geom_bar(stat="identity")
# 使用新的調(diào)色板和藍色邊框線
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat="identity", colour="blue") +
guides(fill=guide_legend(reverse=TRUE)) +
scale_fill_brewer(palette = "Pastell")
繪制百分比堆積條形圖
# 首先使用plyr包中的ddply()函數(shù)和transform()函數(shù)將每組條形對應的數(shù)據(jù)標準化為100%格式后叔遂,
# 再對計算結(jié)果繪制對即條形圖即可
library(gcookbook) #為了使用示例數(shù)據(jù)cabbage_exp
library(plyr)
cabbage_exp
# Cultivar Date Weight sd n se
# 1 c39 d16 3.18 0.9566144 10 0.30250803
# 2 c39 d20 2.80 0.2788867 10 0.08819171
# 3 c39 d21 2.74 0.9834181 10 0.31098410
# 4 c52 d16 2.26 0.4452215 10 0.14079141
# 5 c52 d20 3.11 0.7908505 10 0.25008887
# 6 c52 d21 1.47 0.2110819 10 0.06674995
# 以Date為切割變量,對每組數(shù)據(jù)進行transform()
ce = ddply(cabbage_exp, "Date", transform,
percent_weight=Weight/sum(Weight) * 100)
ce
# Cultivar Date Weight sd n se percent_weight
# 1 c39 d16 3.18 0.9566144 10 0.30250803 58.45588
# 2 c52 d16 2.26 0.4452215 10 0.14079141 41.54412
# 3 c39 d20 2.80 0.2788867 10 0.08819171 47.37733
# 4 c52 d20 3.11 0.7908505 10 0.25008887 52.62267
# 5 c39 d21 2.74 0.9834181 10 0.31098410 65.08314
# 6 c52 d21 1.47 0.2110819 10 0.06674995 34.91686
ggplot(ce, aes(x=Date, y=percent_weight, fill=Cultivar)) +
geom_bar(stat = "identity")
# 計算百分比之后恼除,可以按照繪制常規(guī)對即條形圖的方法來繪制百分比堆積條形圖
# 可以自行調(diào)整圖例順序兢仰、更換調(diào)色板及添加邊框線等等
添加數(shù)據(jù)標簽
# 繪圖命令中加上geom_text()即可為條形圖添加數(shù)據(jù)標簽(其他圖也可)
# 使用時,需要指定一個變量映射給x贩挣、y和標簽本身喉前,通過設(shè)定vjust可將標簽位置移動至條形圖的上方或者下方
library(gcookbook)
# 標簽在圖形頂端上方
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=1.5, colour="white")
# 標簽在圖形底端上方
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=-0.2)
# 有時候標簽在圖形頂端上面時,可能會發(fā)生數(shù)字溢出繪圖區(qū)
# 解決這個問題可以 1>調(diào)整y軸范圍 2>調(diào)整標簽的y軸坐標
# 第二種方法的缺陷是豎直方向的調(diào)整的幅度依賴于y軸的數(shù)據(jù)范圍王财,而更
# 改vjust時卵迂,數(shù)據(jù)標簽離條形頂端的距離會根據(jù)條形圖的高度自動進行調(diào)整
# 將y軸上限變大
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(label=Weight), vjust=-0.2) +
ylim(0, max(cabbage_exp$Weight)*1.05)
# 將y軸上限變大
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(y=Weight + 0.1), label=Weight)
# 設(shè)定標簽的y軸位置使其略高于條形圖頂端--y軸范圍會自動調(diào)整
ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar), y=Weight)) +
geom_bar(stat="identity") +
geom_text(aes(y=Weight+0.1, label=Weight))
# 如果是簇狀條形圖,需要設(shè)定position_dodge()并給一個參數(shù)來設(shè)定分類
# 間距绒净,分類間距默認值是0.9见咒,標簽字體大小可使用size來調(diào)整。默認為5
ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar), y=Weight) +
geom_bar(stat="identity", position="dodge") +
geom_text(aes(label=Weight), vjust=1.5, colour="white",
position = position_dodge(.9), size=3)
# 堆積條形圖增加數(shù)據(jù)標簽
# 在堆積條形圖添加數(shù)據(jù)標簽之前挂疆,要先對每組條形對應的數(shù)據(jù)進行累積求和改览,這
# 個過程必須保證數(shù)據(jù)的合理安排哎垦,否則可能出現(xiàn)錯誤的累積和
# 可以使用plyr包的arrange()函數(shù)完成上述操作,plyr隨ggplot2加載
library(plyr)
ce = arrange(cabbage_exp, Date, Cultivar)
#
# Cultivar Date Weight sd n se
# 1 c39 d16 3.18 0.9566144 10 0.30250803
# 2 c52 d16 2.26 0.4452215 10 0.14079141
# 3 c39 d20 2.80 0.2788867 10 0.08819171
# 4 c52 d20 3.11 0.7908505 10 0.25008887
# 5 c39 d21 2.74 0.9834181 10 0.31098410
# 6 c52 d21 1.47 0.2110819 10 0.06674995
#
# 在數(shù)據(jù)確認安排合理后恃疯,可以借助ddply函數(shù)以Date為分組變量對
# 數(shù)據(jù)進行分組漏设,并分別計算每組數(shù)據(jù)對應的變量Weight的累積和
# 計算累積和
ce = ddply(ce, 'Date', transform, label_y=cumsum(Weight))
ce
# Cultivar Date Weight sd n se label_y
# 1 c39 d16 3.18 0.9566144 10 0.30250803 3.18
# 2 c52 d16 2.26 0.4452215 10 0.14079141 5.44
# 3 c39 d20 2.80 0.2788867 10 0.08819171 2.80
# 4 c52 d20 3.11 0.7908505 10 0.25008887 5.91
# 5 c39 d21 2.74 0.9834181 10 0.31098410 2.74
# 6 c52 d21 1.47 0.2110819 10 0.06674995 4.21
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity") +
geom_text(aes(y=label_y, label=Weight), vjust=1.5, colour="White")
# 如果想把數(shù)據(jù)標簽置于條形中部
ce = arrange(cabbage_exp, Date, Cultivar)
ce = ddply(ce, "Date", transform, label_y=cumsum(Weight)-0.5*Weight)
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
geom_text(aes(y=label_y, label=Weight), colour="White")
# 修改顏色樣式等
ggplot(ce, aes(x=Date, y=Weight, fill=Cultivar)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE),
colour="black") +
geom_text(aes(y=label_y, label=paste(format(Weight, nsamll=2),"kg")),
size=4) +
guides(fill=guide_legend(reverse = TRUE)) +
scale_fill_brewer(palette = "Pastell")
繪制Cleveland點圖
# 使用Cleveland點圖來替代條形圖以減少圖形造成的視覺混亂并使圖形更具可讀性
library(gcookbook)
# 取出tophitters數(shù)據(jù)集中的前25個數(shù)據(jù)
tophit = tophitters2001[1:25,]
ggplot(tophit, aes(x=avg, y=name)) + geom_point()
# tophitters2001數(shù)據(jù)集包含很多列,觀察其中三列
tophit[,c("name","lg","avg")]
# name lg avg
# 1 Larry Walker NL 0.3501
# 2 Ichiro Suzuki AL 0.3497
# 3 Jason Giambi AL 0.3423
# 4 Roberto Alomar AL 0.3357
# 5 Todd Helton NL 0.3356
# 6 Moises Alou NL 0.3314
# 7 Lance Berkman NL 0.3310
# 8 Bret Boone AL 0.3307
# 9 Frank Catalanotto AL 0.3305
# 10 Chipper Jones NL 0.3304
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
geom_point(size=3) +
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype ="dashed"))
# 也可以互換x和y軸今妄,x軸對應于姓名郑口,y軸對應于數(shù)值
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
geom_point(size=3) +
theme_bw() +
theme(axis.text.x = element_text(angle = 60, hjust = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype ="dashed"))
# 有時候根據(jù)其他變量對樣本進行分組很喲用,根據(jù)因子lg對樣本進行分組盾鳞,因子lg對應有NL
# 和AL兩個水平犬性,分別表示國家隊和美國隊,依次根據(jù)lg和avg排序腾仅,reorder參數(shù)只能對
# 一個變量對因子水平進行排序乒裆,只能手動實現(xiàn)上述過程
# 提取出name變量,依次根據(jù)變量lg和avg對其進行排序
nameorder = tophit$name[order(tophit$lg, tophit$avg)]
tophit$name = factor(tophit$name, levels = nameorder)
# 繪圖時推励,將lg變量映射到點的顏色上鹤耍,使用geom_segment()函數(shù),用“以數(shù)據(jù)點為
# 端點”代替貫通全圖的網(wǎng)格線验辞,
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_color_brewer(palette = "Set1", limits=c("NL", "AL")) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
legend.position = c(1, 0.55),
legend.justification = c(1, 0.5))
# 分面
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_color_brewer(palette = "Set1", limits=c("NL", "AL"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank())+
facet_grid(lg ~ .,scales = "free_y", space = "free_y")
end