1. 使用包繪制韋恩圖
1.1 兩個數(shù)據集
# 安裝并加載所需的R包
# install.packages("VennDiagram")
library(VennDiagram)
# 創(chuàng)建測試數(shù)據
set1 <- sample(1:1000,300, replace = F) # replace = F是默認的,表示不放回抽樣
set2 <- sample(1:1000,130, replace = F)
set3 <- sample(1:1000,300, replace = F)
set4 <- sample(1:1000,200, replace = F)
set5 <- sample(1:1000,300, replace = F)
s1 <- list(set1 = set1,
set2 = set2)
v1 <- venn.diagram(x = s1,
filename = NULL, # 直接給一個名稱會自動保存文件到本地
# 輸出的圖形參數(shù)
# imagetype = "png", # 輸出圖片類型(tiff,png,svg)
# height = 1000, # 圖片高度
# width = 1000, # 圖片寬度
# resolution = 300, # 圖片分辨率
scaled = T, # 根據比例顯示大小
alpha=c(0.8, 0.8), # 設置每個區(qū)塊的透明度
## 下面是除了標題外助析,圖形其他元素的設置參數(shù)
# 圖形元素設置:圈
lwd = 1, # 圓圈線條的粗細:1 2 3 4 5 6
lty = 1, # 圓圈線條的類型:1為實線逞壁,2為虛線朦促,blank為無線條
col = c("black","red"), # 圓圈線條顏色
fill = c("#0073C2FF", "#EFC000FF"), # 圓圈顏色
# 圖形元素設置:數(shù)字
cex = 1, # 數(shù)字大小
fontface = "bold", # 加粗
fonrfamily = "sans", # 數(shù)字字體
# 圖形元素設置:標簽即(category)
cat.cex = 1, # 標簽字體大小
cat.col = "black", # 標簽字體色
cat.fontface = "bold", # 加粗
cat.default.pos = "outer", # 標簽內外位置, 在圓圈內還是圓圈外,outer 內 text 外
cat.pos = c(0, 0), # 標簽旋轉位置,用圓的度數(shù)
cat.dist = c(0.05,0.03), # 標簽離圓圈位置,離圓的距離锻霎,如果標簽與圓圈重疊赤惊,可以調整這個參數(shù)
cat.fontfamily = "sans", # 標簽字體
)
cowplot::plot_grid(v1)
1.2 多個數(shù)據集(此處以5個為示例)
s2 <- list(
set1 = set1,
set2 = set2,
set3 = set3,
set4 = set4,
set5 = set5
)
v2 <- venn.diagram(x = s2, filename = NULL,
col = "transparent",
fill = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3", "orchid3"),
label.col = c("dodgerblue", "goldenrod1","darkorange1","seagreen3", "orchid3","white", "white",
"white","white","white","white","white","white", "white","white","white","white",
"white","white","white", "white", "white", "white", "white", "white","white",
"white","white", "white", "white", "black"),
fontface = "bold",
cat.col = c(cat.col = c("darkblue", "darkgreen", "orange", "grey50", "purple")),
cat.dist = c(0.2, 0.2, 0.18, 0.18, 0.2),
alpha = 0.50,
cex = 1,
cat.cex = 1,
margin = 0.05
)
cowplot::plot_grid(v2)
1.3 交集元素的提取
# VennDiagram包中的函數(shù)get.venn.partitions()提供了此這個功能
# 以上述5個分組為例敲长,組間交集元素獲得
inter <- get.venn.partitions(s2)
head(inter)
## set1 set2 set3 set4 set5 ..set.. ..values.. ..count..
## 1 TRUE TRUE TRUE TRUE TRUE set1∩set2∩set3∩set4∩set5 822, 588 2
## 2 FALSE TRUE TRUE TRUE TRUE (set2∩set3∩set4∩set5)?(set1) 406 1
## 3 TRUE FALSE TRUE TRUE TRUE (set1∩set3∩set4∩set5)?(set2) 442, 104 2
## 4 FALSE FALSE TRUE TRUE TRUE (set3∩set4∩set5)?(set1∪set2) 366, 715, 379, 414, 30, 308, 398, 322, 359, 825, 708, 458 12
## 5 TRUE TRUE FALSE TRUE TRUE (set1∩set2∩set4∩set5)?(set3) 615, 541 2
## 6 FALSE TRUE FALSE TRUE TRUE (set2∩set4∩set5)?(set1∪set3) 934, 84, 75, 655 4
★ 5個數(shù)據集是VennDiagram包的上限
2. 使用包繪制韋恩圖
# 安裝并加載所需的R包
# install.packages("ggVennDiagram")
library(ggplot2)
library(ggVennDiagram)
# ggVennDiagram提供了不同的形狀以供選擇为狸,默認情況下歼郭,只使用最合適的形狀,但也可自行指定形狀
plot_shapes()
2.1 三個數(shù)據集
x1 <- list(
set1 = set1,
set2 = set2,
set3 = set3
)
# method1
ggVennDiagram(x1, category.names = c("A", "B", "C"), # 設定樣本名稱
label = "both", # 可選:"both", "count", "percent", "none"
label_color = "black",
label_alpha = 0, # 去除文字標簽底色
edge_lty = "dashed", # 圓圈線條虛線
edge_size = 1) +
scale_fill_gradient(low = "white", high = "#b9292b", name = "gene count")
# method2
# 構建維恩對象
venn <- Venn(x1)
data <- process_data(venn, shape_id == "301")
ggplot() +
geom_sf(aes(fill = count),
data = venn_region(data)) +
geom_sf(color="grey",
size = 1,
data = venn_setedge(data),
show.legend = FALSE) +
scale_fill_gradient(low ="white", high = "#b9292b", name = "gene count")+
geom_sf_text(aes(label = name),
data = venn_setlabel(data),
size = 8) +
geom_sf_label(aes(label = count),
data = venn_region(data),
size = 4) +
theme_void()
2.2 多個數(shù)據集(此處以5個為示例)
# 不添加過多的填充顏色辐棒,可在Ai中進行后期調整
library(ggsci)
ggVennDiagram(x2, , label_alpha = 0, label = "none",
edge_size = 0.5,
# show_intersect = TRUE # 用交互的方式(plotly)查看每個子集中的基因
) +
scale_color_lancet() + # R包"ggsci",柳葉刀期刊色標
scale_fill_gradient(low = "gray100", high = "gray95", guide = "none")
# 自定義顏色牍蜂;
color1 <- alpha("#f8766d", 0.9)
ggVennDiagram(x2, label_alpha = 0, label_size = 3,
# edge_size = 0.5, label ="count", # 隱藏百分比, 默認"both"
# show_intersect = TRUE # 用交互的方式(plotly)查看每個子集中的基因
) +
scale_color_brewer(palette = "Paired") +
scale_fill_gradient(low = "white", high = color1,
guide="none" # 去除圖例
)
★ 支持1-7維的韋恩圖繪制
★ 是ggplot2的拓展包漾根,因此支持ggplot2的其他語法設置
★ show_intersect = T時,可輸出為交互式html鲫竞,此時可點擊數(shù)值顯示源數(shù)據
3. 使用包繪制upset圖
UpsetR包辐怕,經常用于大于5個樣本的“韋恩圖”
# 安裝并加載所需的R包
# install.packages("UpSetR")
# install.packages("RColorBrewer")
# 安裝一個數(shù)據集
install.packages("ggplot2movies")
library(UpSetR)
library(RColorBrewer)
library(ggplot2)
# 使用的來自IMDB中的電影數(shù)據
movies <- as.data.frame(ggplot2movies::movies)
head(movies)
## title year length budget rating votes r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 mpaa Action Animation Comedy Drama Documentary Romance Short
## 1 $ 1971 121 NA 6.4 348 4.5 4.5 4.5 4.5 14.5 24.5 24.5 14.5 4.5 4.5 0 0 1 1 0 0 0
## 2 $1000 a Touchdown 1939 71 NA 6.0 20 0.0 14.5 4.5 24.5 14.5 14.5 14.5 4.5 4.5 14.5 0 0 1 0 0 0 0
## 3 $21 a Day Once a Month 1941 7 NA 8.2 5 0.0 0.0 0.0 0.0 0.0 24.5 0.0 44.5 24.5 24.5 0 1 0 0 0 0 1
## 4 $40,000 1996 70 NA 8.2 6 14.5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 34.5 45.5 0 0 1 0 0 0 0
## 5 $50,000 Climax Show, The 1975 71 NA 3.4 17 24.5 4.5 0.0 14.5 14.5 4.5 0.0 0.0 0.0 24.5 0 0 0 0 0 0 0
## 6 $pent 2000 91 NA 4.3 45 4.5 4.5 4.5 14.5 14.5 14.5 4.5 4.5 14.5 14.5 0 0 0 1 0 0 0
# 調整與美化后的集合圖#
upset(fromList(movies),
nsets = length(movies), # 顯示數(shù)據集的所有數(shù)據, nsets = 數(shù)值調整可視化數(shù)據集數(shù)量
nintersects = 15, # 顯示前多少個
sets = c("title","length","budget","votes","year"), # keep.order = TRUE, # 指定集合或用keep.order = TRUE保持集合按輸入的順序排序
number.angles = 0, # 交互集合柱狀圖的柱標傾角
point.size = 4, # 圖中點的大小
line.size = 1, # 圖中連接線粗細
mainbar.y.label = "Intersection size", # y軸的標簽
main.bar.color = 'black', # y軸柱狀圖顏色
matrix.color = "black", # x軸點的顏色
sets.x.label = "Set size", # x軸的標簽
sets.bar.color=brewer.pal(5,"Set1"), # x軸柱狀圖的顏色; Set1中只有9個顏色,Set3中有12個顏色从绘,Paired中有12個顏色
mb.ratio = c(0.7, 0.3), # bar plot和matrix plot圖形高度的占比
order.by = "freq", # y軸矩陣排序,如"freq"頻率寄疏,"degree"程度
text.scale = c(1.5, 1.5, 1.5, 1.5, 1.5, 1), # 6個參數(shù)intersection size title(y標題大小),intersection size tick labels(y刻度標簽大薪┚), set size title(set標題大猩陆亍), set size tick labels(set刻度標簽大小), set names(set 分類標簽大信病), numbers above bars(柱數(shù)字大信┣)的設置
shade.color = "#12507B", # 圖中陰影部分的顏色
queries=list(list(query = intersects, params = list("votes"), color = "purple", active = T), # 設置自己想要展示的特定組的交集,通過queries參數(shù)進行設置驻债,需要展示幾個關注組合的顏色乳规,就展示幾個
list(query = intersects, params = list("votes","length"), color = "orange", active = T))
)
★ 不支持ggplot語法
4. 使用包繪制upset圖
4.1 基本用法
# 安裝并加載所需的R包
# install.packages('ComplexUpset')
# if(!require(devtools)) install.packages("devtools")
# devtools::install_github("krassowski/complex-upset")
library(ggplot2)
library(ComplexUpset)
movies = as.data.frame(ggplot2movies::movies)
# 第18-24列是電影類型(用0,1矩陣表示)
genres <- colnames(movies)[18:24]
genres
## [1] "Action" "Animation" "Comedy" "Drama" "Documentary" "Romance" "Short"
# 把mpaa這一列中的空值變成NA,然后為了方便演示去掉缺失值
movies[movies$mpaa == "", "mpaa"] <- NA
movies <- na.omit(movies)
upset(movies, genres,
name='genre', # 底部的標簽
width_ratio = 0.2, # 左側柱狀圖的寬度
height_ratio = 0.3, # 下圖部分比例
min_size = 5, # 顯示的最小集合的大小
min_degree = 2, # 最小等級合呐,即顯示最少幾個數(shù)據集的集合
n_intersections = 15,
wrap = TRUE, set_sizes = FALSE
)
4.2 添加組件(annotations)
# 三種方法添加多個注釋組件
upset(
movies,
genres,
annotations = list(
# 方法1-使用list:添加length這一列數(shù)據
'Length'= list(
aes = aes(x = intersection, y = length),
geom = geom_boxplot(na.rm = TRUE)
),
# 方法2-使用ggplot2:添加rating這一列數(shù)據
'Rating'=(
# aes(x=intersection) 是默認提供的暮的,可以跳過
ggplot(mapping = aes(y = rating))
+ geom_jitter(aes(color = log10(votes)), na.rm = TRUE)
+ geom_violin(alpha = 0.5, na.rm = TRUE)
),
# 方法3:使用內置的 upset_annotate() 函數(shù)
'Budget'=upset_annotate('budget', geom_boxplot(na.rm=TRUE))
),
min_size = 10,
width_ratio = 0.1
)
# 使用條形圖來展示分類變量比例的差異
upset(
movies,
genres,
annotations = list(
'MPAA Rating'= (
ggplot(mapping = aes(fill = mpaa))
+ geom_bar(stat = 'count', position = 'fill')
+ scale_y_continuous(labels = scales::percent_format())
+ scale_fill_manual(values = c(
'R' = '#E41A1C', 'PG' = '#377EB8',
'PG-13' = '#4DAF4A', 'NC-17' = '#FF7F00'
))
+ ylab('MPAA Rating')
)
),
width_ratio = 0.1
)
4.3 區(qū)域選擇模式
ComplexUpset提供定義相應維恩圖上的感興趣區(qū)域(以A、B淌实、C三個數(shù)據集為例)冻辩,自定義時,可用intersection_size()進行相應地調整
:1) exclusive_intersection( (??∩??)???):屬于定義交集但不屬于任何其他集的交集元素(別名:distinct)翩伪,默認
2) inclusive_intersection(??∩??):屬于定義交叉點的集合的交叉點元素微猖,包括與其他集合的重疊(別名:intersect)
3) exclusive_union((??∪??)???):屬于定義并集的集合的并集元素,不包括與任何其他集合重疊的元素
4) inclusive_unionregion(??∪??):屬于定義并集的集合的并集元素缘屹,包括與任何其他集合重疊的元素(別名:union)
upset(
upset(
movies, genres,
mode = 'inclusive_intersection',
annotations = list(
# # 這里如果不指定就會使用上面設置好的模式)
'Length (inclusive intersection)' = (
ggplot(mapping = aes(y = length))
+ geom_jitter(alpha = 0.2, na.rm = TRUE)
),
'Length (exclusive intersection)' = (
ggplot(mapping = aes(y = length))
+ geom_jitter(alpha = 0.2, na.rm = TRUE)
+ upset_mode('exclusive_intersection')
),
'Length (inclusive union)' = (
ggplot(mapping = aes(y = length))
+ geom_jitter(alpha = 0.2, na.rm = TRUE)
+ upset_mode('inclusive_union')
)
),
min_size = 10,
width_ratio = 0.1
)
# 增加顏色映射
library(ggsci)
upset(movies, genres,
min_size = 10, width_ratio = 0.1,
# 調整intersection size
base_annotations = list(
"intersection size" = intersection_size(
counts = F, # 不顯示個數(shù)
mapping = aes(fill = "bars_color")
)
+ scale_fill_manual(values = c("bars_color" = "skyblue"), guide = "none") # 使用單一顏色
)
)
upset(movies, genres,
min_size = 10, width_ratio = 0.1,
# 調整intersection size
base_annotations = list(
"intersection size" = intersection_size(
counts = F, # 不顯示個數(shù)
mapping = aes(fill = mpaa)
)
+ scale_fill_lancet() # 使用ggsci包的lancet配色
)
)
5. 使用包凛剥,韋恩圖+韋恩條形圖+韋恩餅圖+upset圖
5.1 不同布局的圖形
# 安裝并加載所需的R包
# if (!requireNamespace("BiocManager"))
# install.packages("BiocManager")
# BiocManager::install("VennDetail")
library(VennDetail)
# 創(chuàng)建測試數(shù)據
A <- sample(1:1000, 400, replace = FALSE)
B <- sample(1:1000, 600, replace = FALSE)
C <- sample(1:1000, 350, replace = FALSE)
D <- sample(1:1000, 550, replace = FALSE)
E <- sample(1:1000, 450, replace = FALSE)
venn <- venndetail(list(A = A, B = B, C= C, D = D, E = E))
detail(venn)
# 韋恩圖(默認)
plot(venn)
# 韋恩餅圖
plot(venn, type = "vennpie")
vennpie(venn,
min = 4 # 顯示集合至少包含來自四個數(shù)據集的元素
# any = 1, revcolor = "lightgrey" # 突出顯示唯一或共享子集
)
# 韋恩條形圖
dplot(venn, order = TRUE, textsize = 4)
# upset圖
plot(venn, type = "upset")
5.2 提取子集及可用注釋
## 列出子集名稱
detail(venn)
## Shared B_C_D_E A_C_D_E C_D_E A_B_D_E B_D_E A_D_E D_E A_B_C_E B_C_E A_C_E C_E A_B_E B_E
## 15 27 14 23 51 59 29 38 17 22 11 14 29 50
## A_E E A_B_C_D B_C_D A_C_D C_D A_B_D B_D A_D D A_B_C B_C A_C C
## 19 32 28 43 7 27 34 61 32 62 30 37 14 21
## A_B B A
## 49 48 21
head(getSet(venn, subset = c("Shared", "A_C_D_E")), 10)
## Subset Detail
## 1 Shared 522
## 2 Shared 413
## 3 Shared 362
## 4 Shared 415
## 5 Shared 789
## 6 Shared 984
## 7 Shared 712
## 8 Shared 719
## 9 Shared 114
## 10 Shared 666
head(result(venn, wide = TRUE))
## Detail A B C D E SharedSets
## 10 522 1 1 1 1 1 5
## 52 413 1 1 1 1 1 5
## 116 362 1 1 1 1 1 5
## 136 415 1 1 1 1 1 5
## 177 789 1 1 1 1 1 5
## 185 984 1 1 1 1 1 5
參考: