整體腳本如下蜘犁,需要你的文件包含CHR
BP
P
三列,無所謂順序瞎暑,也無所謂有啥其他列曙求,只要有這三列即可
library(data.table)
library(ggplot2)
library(dplyr)
dt = fread(infile)[, c("CHR", "BP", "P")]
# 1. 計算每個染色體的長度(BP最大值)
chr_lengths <- dt %>%
group_by(CHR) %>%
summarise(chr_len = max(BP))
# 2. 累計基因組位置
data <- dt %>%
arrange(CHR, BP) %>%
mutate(chr_cumsum = cumsum(c(0, head(chr_lengths$chr_len, -1)))[CHR]) %>%
mutate(BPcum = BP + chr_cumsum)
# 3. 計算每個染色體在x軸上的中間點,用來顯示染色體名稱
axis_set <- data %>%
group_by(CHR) %>%
summarise(center = (max(BPcum) + min(BPcum)) / 2)
# 4. 繪制曼哈頓圖
p = ggplot(data, aes(x=BPcum, y=-log10(P), color=factor(CHR))) +
geom_point(alpha=0.9) +
geom_hline(yintercept=-log10(5e-8), color="red", linetype="dashed") +
scale_color_manual(values=rep(c("#1B2C62", "#4695BC"), 22)) +
scale_x_continuous(label=axis_set$CHR,
breaks=axis_set$center,
expand=c(0, 0)) +
scale_y_continuous(expand=c(0, 0)) +
labs(x="Chromosome", y=expression(-log[10](P))) +
theme_classic() +
theme(legend.position="none",
axis.text.x = element_text(face="bold", size=20, color="black"),
axis.text.y = element_text(face="bold", size=20, color="black"),
axis.title.x = element_text(face="bold", size=20, margin=margin(t=10), color="black"),
axis.title.y = element_text(face="bold", size=20, margin=margin(t=10), color="black"),
axis.line = element_line(color="black"),
axis.ticks = element_line(color="black"),
axis.ticks.length = unit(0.3, "cm"))
ggsave("my.png", p, height = 8, width = 23, dpi=300)
結(jié)果如下
my.png