論文
A saturated map of common genetic variants associated with human height
https://www.nature.com/articles/s41586-022-05275-y
s41586-022-05275-y.pdf
代碼沒有公開绽族,但是作圖數(shù)據(jù)基本都公開了刃榨,爭取把每個(gè)圖都重復(fù)一遍
今天的推文重復(fù)論文中的Figure1
代碼
setwd("data/20221014")
library(readxl)
fig1<-read_excel("Figure1.xlsx")
colnames(fig1)
library(tidyverse)
library(stringr)
str_replace_all("[0,5e-100]","\\(|5e-|\\]|\\[","") %>%
str_split_fixed(",",n=2) %>%
as.data.frame() %>%
pull(V1) %>% as.numeric()
str_replace_all("[0,5e-100]","\\(|5e-|\\]|\\[","") %>%
str_split_fixed(",",n=2) %>%
as.data.frame() %>%
pull(V2) %>% as.numeric()
fig1 %>%
mutate(max_value=str_replace_all(`P-value Caregory`,"\\(|5e-|\\]|\\[","") %>%
str_split_fixed(",",n=2) %>%
as.data.frame() %>%
pull(V1) %>% as.numeric(),
min_value=str_replace_all(`P-value Caregory`,"\\(|5e-|\\]|\\[","") %>%
str_split_fixed(",",n=2) %>%
as.data.frame() %>%
pull(V2) %>% as.numeric()) %>%
mutate(group=case_when(
min_value == 100 & max_value == 0 ~ "group01",
min_value == 50 & max_value == 100 ~ "group02",
min_value == 20 & max_value == 50 ~ "group03",
min_value == 10 & max_value == 20 ~ "group04",
min_value == 8 & max_value == 10 ~ "group05",
)) -> new.fig1
table(new.fig1$group)
library(ggplot2)
library(ggh4x)
library(cowplot)
ggplot(data=new.fig1,
aes(x=`Minor Allele Frequency`,
y=`Join Effect of Minor Allele`,
color=group))+
geom_point( key_glyph = rectangle_key_glyph(color=color,
fill=color,
padding = margin(3, 3, 3, 3)))+
scale_color_manual(values = c("group01"="#ee82ee",
"group02"="#2e8b57",
"group03"="#1e90ff",
"group04"="#daa520",
"group05"="#cdc673"),
name="",
labels=c("group01"="P < 5 × 10–100 (672 SNPs)",
"group02"="5 × 10–50 > P > 5 × 10–100 (1,110 SNPs)",
"group03"="5 × 10–20 > P > 5 × 10–50 (3,513 SNPs)",
"group04"="5 × 10–10 > P > 5 × 10–20 (5,192 SNPs)",
"group05"="5 × 10–8 > P > 5 × 10–10 (1,624 SNPs)"))+
theme_bw()+
theme(panel.grid = element_blank(),
panel.border = element_blank(),
axis.line = element_line(),
legend.position = c(0.7,0.8))+
scale_x_continuous(breaks = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5),
labels = c(1,5,10,20,30,40,50))+
scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1,0.2,0.3),
limits = c(-0.3,0.3))+
guides(x=guide_axis_truncated(trunc_lower = 0.01,
trunc_upper = 0.5),
y=guide_axis_truncated(trunc_lower = -0.3,
trunc_upper = 0.3))+
labs(x="MAF (%) in cross-ancestry meta-analysis",
y="Joint effect sizes (s.d.) of minor alleles\nin cross-ancestry meta-analysis")+
geom_hline(yintercept = 0,color="gray")+
geom_smooth(data = new.fig1 %>%
filter(group=="group01") %>%
filter(`Join Effect of Minor Allele`<0),
aes(x=`Minor Allele Frequency`,
y=`Join Effect of Minor Allele`),
method = 'loess',
formula = 'y~x',
se=FALSE,color="gray",
show.legend = FALSE)+
geom_smooth(data = new.fig1 %>%
filter(group=="group01") %>%
filter(`Join Effect of Minor Allele`>0),
aes(x=`Minor Allele Frequency`,
y=`Join Effect of Minor Allele`),
method = 'loess',
formula = 'y~x',
se=FALSE,color="gray",
show.legend = FALSE)
關(guān)于曲線不太清楚是用什么數(shù)據(jù)做的宛乃,這里直接自動(dòng)添加擬合曲線
圖例里的文本上下標(biāo) 出圖后再編輯吧
關(guān)于散點(diǎn)圖今天還新學(xué)到一個(gè)知識(shí)點(diǎn)是:散點(diǎn)圖的點(diǎn)如果非常多,如果輸出pdf文件的話载慈,pdf文件會(huì)非常大喳整,比如GWAS里常用的曼哈頓圖,這個(gè)pdf文件如果非常大后續(xù)如果想要編輯這個(gè)pdf文件會(huì)比較麻煩署惯。
關(guān)于如何解決這個(gè)問題又官,看到一個(gè)討論群里有人討論延刘,他們提到一個(gè)辦法是可以把散點(diǎn)柵格化 (柵格化是什么意思暫時(shí)不太明白)可以借助R包ggrastr
對(duì)應(yīng)的github主頁是
https://github.com/VPetukhov/ggrastr
正好我們今天的推文內(nèi)容是數(shù)據(jù)量比較多的散點(diǎn)圖,我們可以按照這個(gè)做法試試六敬,這里參考微信公眾號(hào)推文 https://mp.weixin.qq.com/s/ou0cjD8dLMNaDLk588KSwQ
安裝ggrastr這個(gè)R包
install.packages('ggrastr')
如果要把點(diǎn)柵格化碘赖,只需要把對(duì)應(yīng)的散點(diǎn)圖函數(shù)geom_point()
換成geom_point_rast()
library(ggrastr)
p2<-ggplot(data=new.fig1,
aes(x=`Minor Allele Frequency`,
y=`Join Effect of Minor Allele`,
color=group))+
geom_point_rast( key_glyph = rectangle_key_glyph(color=color,
fill=color,
padding = margin(3, 3, 3, 3)),
size=0.1,
raster.dpi = getOption("ggrastr.default.dpi", 300))+
scale_color_manual(values = c("group01"="#ee82ee",
"group02"="#2e8b57",
"group03"="#1e90ff",
"group04"="#daa520",
"group05"="#cdc673"),
name="",
labels=c("group01"="P < 5 × 10–100 (672 SNPs)$)",
"group02"="5 × 10–50 > P > 5 × 10–100 (1,110 SNPs)",
"group03"="5 × 10–20 > P > 5 × 10–50 (3,513 SNPs)",
"group04"="5 × 10–10 > P > 5 × 10–20 (5,192 SNPs)",
"group05"="5 × 10–8 > P > 5 × 10–10 (1,624 SNPs)"))+
theme_bw()+
theme(panel.grid = element_blank(),
panel.border = element_blank(),
axis.line = element_line(),
legend.position = c(0.7,0.8))+
scale_x_continuous(breaks = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5),
labels = c(1,5,10,20,30,40,50))+
scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1,0.2,0.3),
limits = c(-0.3,0.3))+
guides(x=guide_axis_truncated(trunc_lower = 0.01,
trunc_upper = 0.5),
y=guide_axis_truncated(trunc_lower = -0.3,
trunc_upper = 0.3))+
labs(x="MAF (%) in cross-ancestry meta-analysis",
y="Joint effect sizes (s.d.) of minor alleles\nin cross-ancestry meta-analysis")+
geom_hline(yintercept = 0,color="gray")+
geom_smooth(data = new.fig1 %>%
filter(group=="group01") %>%
filter(`Join Effect of Minor Allele`<0),
aes(x=`Minor Allele Frequency`,
y=`Join Effect of Minor Allele`),
method = 'loess',
formula = 'y~x',
se=FALSE,color="gray",
show.legend = FALSE)+
geom_smooth(data = new.fig1 %>%
filter(group=="group01") %>%
filter(`Join Effect of Minor Allele`>0),
aes(x=`Minor Allele Frequency`,
y=`Join Effect of Minor Allele`),
method = 'loess',
formula = 'y~x',
se=FALSE,color="gray",
show.legend = FALSE)
pdf("p1.pdf",width = 6,height = 6)
p1
dev.off()
pdf("p2.pdf",width = 6,height = 6)
p2
dev.off()
輸出的p2如果放大 點(diǎn)是會(huì)變模糊的
兩個(gè)文件的大小也不一樣,柵格化之前是700k外构,柵格化之后只有200k
示例數(shù)據(jù)和代碼可以給公眾號(hào)推文點(diǎn)贊普泡,點(diǎn)擊在看,最后留言獲取
歡迎大家關(guān)注我的公眾號(hào)
小明的數(shù)據(jù)分析筆記本
小明的數(shù)據(jù)分析筆記本 公眾號(hào) 主要分享:1审编、R語言和python做數(shù)據(jù)分析和數(shù)據(jù)可視化的簡單小例子撼班;2、園藝植物相關(guān)轉(zhuǎn)錄組學(xué)垒酬、基因組學(xué)砰嘁、群體遺傳學(xué)文獻(xiàn)閱讀筆記;3勘究、生物信息學(xué)入門學(xué)習(xí)資料及自己的學(xué)習(xí)筆記般码!