hello,大家好屡限,不知道下面這張圖大家還有印象么
在我之前分享的文章10X單細(xì)胞(10X空間轉(zhuǎn)錄組)之細(xì)胞通訊軟件之間的分析比較中,大量采用這樣的圖進(jìn)行展示,后來呢归敬,在文章文章中的啞鈴圖復(fù)現(xiàn)(基礎(chǔ)知識)嘗試完全實(shí)現(xiàn),可惜半路停止,今天就給大家展示如何實(shí)現(xiàn)這樣的圖,不帶一點(diǎn)折扣妹懒。
其實(shí)這個圖有個專業(yè)的名詞,叫韋恩圖双吆,但是大家熟悉的韋恩圖應(yīng)該是長下面的樣子:
少量的情況下非常美觀眨唬,但數(shù)據(jù)一多信息展示就會混亂,于是就有了上面的展示方式好乐,好了匾竿,借助R包UpSet,我們來實(shí)現(xiàn)這個圖蔚万。
接下來我們直接通過實(shí)例來看下如何進(jìn)行數(shù)據(jù)的可視化:
install.packages("UpSetR")
##載入包
library(UpSetR)
library(ggplot2)
library(grid)
library(plyr)
##構(gòu)建數(shù)據(jù)
# example oflist input (list of named vectors)
listInput <-list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), two = c(1, 2, 4, 5, 10), three =c(1, 5, 6, 7, 8, 9, 10, 12, 13))
# example ofexpression input
expressionInput<- c(one = 2, two = 1, three = 2, `one&two` = 1, `one&three` = 4,`two&three` = 1, `one&two&three` = 2)
##可視化結(jié)果
upset(fromList(listInput),order.by = "freq")
upset(fromExpression(expressionInput),order.by = "freq")
##載入數(shù)據(jù)并繪圖
movies <-read.csv(system.file("extdata", "movies.csv", package ="UpSetR"), header = T, sep = ";")
## nsets(頻數(shù)最多的前六個變量)岭妖,text.scale =c(intersection size title, intersection size ticklabels, set size title, set size tick labels, set names, numbers above bars)
upset(movies,nsets = 6, number.angles = 30, point.size = 3.5, line.size = 2, mainbar.y.label = "Genre Intersections", sets.x.label = "Movies Per Genre", text.scale =c(1.3, 1.3, 1, 1, 2, 0.75))
##自定義交集的組
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq")
##基于相交的等級進(jìn)行排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "degree")
##基于等級和頻率共同排序,通過先后來確定排序順序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = c("degree", "freq"))
##保留各組頻數(shù)反璃,不排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq", keep.order = TRUE)
##對交集結(jié)果進(jìn)行分組昵慌,nintersects交叉點(diǎn)的數(shù)目閾值,cutoff交叉結(jié)果閾值版扩。
upset(movies,nintersects = 70, group.by = "sets", cutoff = 7)
##空的交叉點(diǎn)展示
upset(movies,empty.intersections = "on", order.by = "freq")
##利用不同的顏色顯示重要的信息
upset(movies,queries = list(list(query = intersects, params = list("Drama", "Comedy","Action"), color = "orange", active = T), list(query =intersects, params = list("Drama"), color = "red", active =F), list(query = intersects, params = list("Action","Drama"), active = T)))
##通過設(shè)置閾值進(jìn)行標(biāo)記
upset(movies,queries = list(list(query = elements, params = list("AvgRating", 3.5,4.1), color = "blue", active = T), list(query = elements, params =list("ReleaseDate", 1980, 1990, 2000), color = "red",active = F)))
##通過expression進(jìn)行篩選
upset(movies,queries = list(list(query = intersects, params = list("Action", "Drama"),active = T), list(query = elements, params = list("ReleaseDate", 1980,1990, 2000), color = "red", active = F)), expression ="AvgRating > 3 & Watches > 100")
##自定義的query結(jié)構(gòu)
Myfunc <-function(row, release, rating) {data <- (row["ReleaseDate"] %in%release) & (row["AvgRating"] > rating)}#row數(shù)據(jù)源废离,release侄泽,rating指的是parms中的第一礁芦,二個參數(shù)
upset(movies,queries = list(list(query = Myfunc, params = list(c(1970, 1980, 1990, 1999,2000), 2.5), color = "blue", active = T)))
##增加query的標(biāo)簽legend
upset(movies,query.legend = "top", queries = list(list(query = intersects,
params = list("Drama","Comedy", "Action"), color = "orange", active =T,
query.name = "Funny action"),list(query = intersects, params = list("Drama"),
color = "red", active = F),list(query = intersects, params = list("Action",
"Drama"), active = T, query.name = "Emotionalaction")))
##綜合前面的方式的完整例子
upset(movies, query.legend = "bottom", queries =list(list(query = Myfunc, params = list(c(1970,
1980, 1990, 1999, 2000),2.5), color = "orange", active = T), list(query = intersects,
params = list("Action","Drama"), active = F), list(query = elements, params =list("ReleaseDate",
1980, 1990, 2000), color ="red", active = F, query.name = "Decades")),
expression ="AvgRating > 3 & Watches > 100")
##通過柱狀圖增加變量的其它數(shù)據(jù)信息其中type=bar plot("hist") or heat map ("heat"/“bool”)
sets <- names(movies[3:19])
avgRottenTomatoesScore <- round(runif(17, min = 0, max = 90))
metadata <- as.data.frame(cbind(sets, avgRottenTomatoesScore))
names(metadata) <- c("sets","avgRottenTomatoesScore")
metadata$avgRottenTomatoesScore <-as.numeric(as.character(metadata$avgRottenTomatoesScore))
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
column ="avgRottenTomatoesScore", assign = 20))))
##增加熱圖信息
Cities <- sample(c("Boston", "NYC","LA"), 17, replace = T)
metadata <- cbind(metadata, Cities)
metadata$Cities <- as.character(metadata$Cities)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller",
"Romance")), ]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "heat",
column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
LA ="purple")))))
##增加文字信息
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "text",
column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
LA ="purple")))))
##直接設(shè)置連線區(qū)域背景
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
column ="avgRottenTomatoesScore", assign = 20), list(type ="matrix_rows",
column ="Cities", colors = c(Boston = "green", NYC ="navy", LA = "purple"),
alpha = 0.5))))
##一次添加多種信息
accepted <- round(runif(17, min = 0, max = 1))
metadata <- cbind(metadata, accepted)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller", "Romance")),]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist", column = "avgRottenTomatoesScore",assign = 20), list(type = "bool", column = "accepted", assign= 5, colors = c("#FF3333", "#006400")), list(type ="text", column = "Cities", assign = 5, colors = c(Boston ="green", NYC = "navy", LA = "purple")))))
##混合圖的繪制,通過attribute.plots添加
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
column ="avgRottenTomatoesScore", assign = 20), list(type = "bool",column = "accepted",
assign = 5, colors =c("#FF3333", "#006400")), list(type = "text",column = "Cities",
assign = 5, colors =c(Boston = "green", NYC = "navy", LA ="purple")),
list(type ="matrix_rows", column = "Cities", colors = c(Boston ="green",
NYC ="navy", LA = "purple"), alpha = 0.5))), queries =list(list(query = intersects,
params =list("Drama"), color = "red", active = F), list(query =intersects,
params =list("Action", "Drama"), active = T), list(query =intersects,
params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
attribute.plots =list(gridrows = 45, plots = list(list(plot = scatter_plot,
x ="ReleaseDate", y = "AvgRating", queries = T), list(plot =scatter_plot,
x ="AvgRating", y = "Watches", queries = F)), ncols = 2),query.legend = "bottom")
##自定義繪圖函數(shù)的混合繪圖
myplot <- function(mydata, x, y) {
plot <- (ggplot(data =mydata, aes_string(x = x, y = y, colour = "color")) +
geom_point() + scale_color_identity()+ theme(plot.margin = unit(c(0,
0, 0, 0),"cm")))
}
another.plot <- function(data, x, y) {
data$decades <-round_any(as.integer(unlist(data[y])), 10, ceiling)
data <-data[which(data$decades >= 1970), ]
myplot <- (ggplot(data,aes_string(x = x)) + geom_density(aes(fill = factor(decades)),
alpha = 0.4) +theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.key.size =unit(0.4,
"cm")))
}
upset(movies, main.bar.color = "black", queries =list(list(query = intersects,
params =list("Drama"), color = "red", active = F), list(query =intersects,
params =list("Action", "Drama"), active = T), list(query =intersects,
params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
attribute.plots =list(gridrows = 45, plots = list(list(plot = myplot, x ="ReleaseDate",
y ="AvgRating", queries = T), list(plot = another.plot, x ="AvgRating",
y ="ReleaseDate", queries = F)), ncols = 2))
##增加箱線圖
upset(movies, boxplot.summary = c("AvgRating","ReleaseDate"))
好了悼尾,大家多多學(xué)習(xí)
生活很好柿扣,有你更好