2021-11-16 ggtree包繪制進(jìn)化樹

余大神的力作之一:Guangchuang Yu (https://yulab-smu.top)。繪制出的進(jìn)化樹還是很專業(yè)美觀的岸蜗!
——電子書: Data Integration, Manipulation and Visualization of Phylogenetic Trees
——書包括4個(gè)部分:1)數(shù)據(jù)輸入、輸出和操作月劈。2)樹數(shù)據(jù)的可視化和注釋键畴。3)ggtree延展。4)其它葵蒂。

ggtree包的功能很多躏筏。這里摘抄幾個(gè)例子板丽,詳見原作網(wǎng)站。

1.加載包

library(treedataverse)
##  Attaching packages  treedataverse 0.0.1 

##  ape         5.5         treeio      1.18.0
##  dplyr       1.0.7       ggtree      3.2.0 
##  ggplot2     3.3.5       ggtreeExtra 1.4.0 
##  tidytree    0.3.6

2.Aligning graph to the tree based on the tree structure

library(ggtree)
library(TDbook)

## load `tree_nwk`, `df_info`, `df_alleles`, and `df_bar_data` from 'TDbook'
tree <- tree_nwk
snps <- df_alleles
snps_strainCols <- snps[1,] 
snps<-snps[-1,] # drop strain names
colnames(snps) <- snps_strainCols

gapChar <- "?"
snp <- t(snps)
lsnp <- apply(snp, 1, function(x) {
        x != snp[1,] & x != gapChar & snp[1,] != gapChar
    })
lsnp <- as.data.frame(lsnp)
lsnp$pos <- as.numeric(rownames(lsnp))
lsnp <- tidyr::gather(lsnp, name, value, -pos)
snp_data <- lsnp[lsnp$value, c("name", "pos")]

## visualize the tree 
p <- ggtree(tree) 

## attach the sampling information data set 
## and add symbols colored by location
p <- p %<+% df_info + geom_tippoint(aes(color=location))

## visualize SNP and Trait data using dot and bar charts,
## and align them based on tree structure
p + geom_facet(panel = "SNP", data = snp_data, geom = geom_point, 
               mapping=aes(x = pos, color = location), shape = '|') +
    geom_facet(panel = "Trait", data = df_bar_data, geom = geom_col, 
                aes(x = dummy_bar_value, color = location, 
                fill = location), orientation = 'y', width = .6) +
    theme_tree2(legend.position=c(.05, .85))
image.png

3.Visualize a tree with associated matrix

beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)

genotype_file <- system.file("examples/Genotype.txt", package="ggtree")
genotype <- read.table(genotype_file, sep="\t", stringsAsFactor=F)
colnames(genotype) <- sub("\\.$", "", colnames(genotype))
p <- ggtree(beast_tree, mrsd="2013-01-01") + 
    geom_treescale(x=2008, y=1, offset=2) + 
    geom_tiplab(size=2)
gheatmap(p, genotype, offset=5, width=0.5, font.size=3, 
        colnames_angle=-45, hjust=0) +
    scale_fill_manual(breaks=c("HuH3N2", "pdm", "trig"), 
        values=c("steelblue", "firebrick", "darkgreen"), name="genotype")

p <- ggtree(beast_tree, mrsd="2013-01-01") + 
    geom_tiplab(size=2, align=TRUE, linesize=.5) + 
    theme_tree2()
gheatmap(p, genotype, offset=8, width=0.6, 
        colnames=FALSE, legend_title="genotype") +
    scale_x_ggtree() + 
    scale_y_continuous(expand=c(0, 0.3))
image.png

4.Visualize a tree with multiple associated matrices

nwk <- system.file("extdata", "sample.nwk", package="treeio")

tree <- read.tree(nwk)
circ <- ggtree(tree, layout = "circular")

df <- data.frame(first=c("a", "b", "a", "c", "d", "d", "a", 
                        "b", "e", "e", "f", "c", "f"),
                 second= c("z", "z", "z", "z", "y", "y", 
                        "y", "y", "x", "x", "x", "a", "a"))
rownames(df) <- tree$tip.label

df2 <- as.data.frame(matrix(rnorm(39), ncol=3))
rownames(df2) <- tree$tip.label
colnames(df2) <- LETTERS[1:3]


p1 <- gheatmap(circ, df, offset=.8, width=.2,
               colnames_angle=95, colnames_offset_y = .25) +
    scale_fill_viridis_d(option="D", name="discrete\nvalue")


library(ggnewscale)
p2 <- p1 + new_scale_fill()
gheatmap(p2, df2, offset=15, width=.3,
         colnames_angle=90, colnames_offset_y = .25) +
    scale_fill_viridis_c(option="A", name="continuous\nvalue")
mgheatmap-1.gif

5.Visualize a tree with multiple sequence alignment

library(TDbook)

# load `tree_seq_nwk` and `AA_sequence` from 'TDbook'
p <- ggtree(tree_seq_nwk) + geom_tiplab(size=3)
msaplot(p, AA_sequence, offset=3, width=2)
p <- ggtree(tree_seq_nwk, layout='circular') + 
    geom_tiplab(offset=4, align=TRUE) + xlim(NA, 12)
msaplot(p, AA_sequence, window=c(120, 200))
image.png

6. Composite plots

library(ggplot2)
library(ggtree)

set.seed(2019-10-31)
tr <- rtree(10)

d1 <- data.frame(
    # only some labels match
    label = c(tr$tip.label[sample(5, 5)], "A"),
    value = sample(1:6, 6))

d2 <- data.frame(
    label = rep(tr$tip.label, 5),
    category = rep(LETTERS[1:5], each=10),
    value = rnorm(50, 0, 3)) 

g <- ggtree(tr) + geom_tiplab(align=TRUE)

p1 <- ggplot(d1, aes(label, value)) + geom_col(aes(fill=label)) + 
    geom_text(aes(label=label, y= value+.1)) +
    coord_flip() + theme_tree2() + theme(legend.position='none')
 
p2 <- ggplot(d2, aes(x=category, y=label)) + 
    geom_tile(aes(fill=value)) + scale_fill_viridis_c() + 
    theme_minimal() + xlab(NULL) + ylab(NULL)
composite-1.gif

7.The phylo4 and phylo4d objects

library(phylobase)
data(geospiza_raw)
g1 <- as(geospiza_raw$tree, "phylo4")
g2 <- phylo4d(g1, geospiza_raw$data, missing.data="warn")

d1 <- data.frame(x = seq(1.1, 2, length.out = 5),
                lab = names(geospiza_raw$data))

p1 <- ggtree(g2) + geom_tippoint(aes(size = wingL), x = d1$x[1], shape = 1) + 
    geom_tippoint(aes(size = tarsusL), x = d1$x[2], shape = 1) + 
    geom_tippoint(aes(size = culmenL), x = d1$x[3], shape = 1) + 
    geom_tippoint(aes(size = beakD),   x = d1$x[4], shape = 1) + 
    geom_tippoint(aes(size = gonysW),  x = d1$x[5], shape = 1) + 
    scale_size_continuous(range = c(3,12), name="") + 
    geom_text(aes(x = x, y = 0, label = lab), data = d1, angle = 45) +
    geom_tiplab(offset = 1.3) + xlim(0, 3) +
    theme(legend.position = c(.1, .75))  

## users can use `as.treedata(g2)` to convert `g2` to a `treedata` object
## and use `get_tree_data()` function to extract the associated data 

p2 <- gheatmap(ggtree(g1), data=geospiza_raw$data, colnames_angle=45) + 
  geom_tiplab(offset=1) + hexpand(.2) + theme(legend.position = c(.1, .75))

aplot::plot_list(p1, p2, ncol=2, tag_levels='A')    
image.png

8.ggtree for other tree-like structures

library(treeio)
library(ggplot2)
library(ggtree)

data("GNI2014", package="treemap")
n <- GNI2014[, c(3,1)]
n[,1] <- as.character(n[,1])
n[,1] <- gsub("\\s\\(.*\\)", "", n[,1])

w <- cbind("World", as.character(unique(n[,1])))

colnames(w) <- colnames(n)
edgelist <- rbind(n, w)

y <- as.phylo(edgelist)
ggtree(y, layout='circular') %<+% GNI2014 + 
    aes(color=continent) + geom_tippoint(aes(size=population), alpha=.6) + 
    geom_tiplab(aes(label=country), offset=.1) +
    theme(plot.margin=margin(60,60,60,60))
image.png

9.Visualizing pairwise nucleotide sequence distance with a phylogenetic tree

library(TDbook)
library(tibble)
library(tidyr)
library(Biostrings)
library(treeio)
library(ggplot2)
library(ggtree)

# loaded from TDbook package
tree <- tree_HPV58

clade <- c(A3 = 92, A1 = 94, A2 = 108, B1 = 156, 
            B2 = 159, C = 163, D1 = 173, D2 = 176)
tree <- groupClade(tree, clade)
cols <- c(A1 = "#EC762F", A2 = "#CA6629", A3 = "#894418", B1 = "#0923FA", 
         B2 = "#020D87", C = "#000000", D1 = "#9ACD32",D2 = "#08630A")

## visualize the tree with tip labels and tree scale
p <- ggtree(tree, aes(color = group), ladderize = FALSE) %>% 
    rotate(rootnode(tree)) + 
    geom_tiplab(aes(label = paste0("italic('", label, "')")), 
                parse = TRUE, size = 2.5) +
    geom_treescale(x = 0, y = 1, width = 0.002) + 
    scale_color_manual(values = c(cols, "black"), 
                na.value = "black", name = "Lineage",
                breaks = c("A1", "A2", "A3", "B1", "B2", "C", "D1", "D2")) +
    guides(color = guide_legend(override.aes = list(size = 5, shape = 15))) +
    theme_tree2(legend.position = c(.1, .88))
## Optional
## add labels for monophyletic (A, C and D) and paraphyletic (B) groups 
dat <- tibble(node = c(94, 108, 131, 92, 156, 159, 163, 173, 176,172),
              name = c("A1", "A2", "A3", "A", "B1", 
                        "B2", "C", "D1", "D2", "D"),
              offset = c(0.003, 0.003, 0.003, 0.00315, 0.003, 
                        0.003, 0.0031, 0.003, 0.003, 0.00315),
              offset.text = c(-.001, -.001, -.001, 0.0002, -.001, 
                        -.001, 0.0002, -.001, -.001, 0.0002),
              barsize = c(1.2, 1.2, 1.2, 2, 1.2, 1.2, 3.2, 1.2, 1.2, 2),
              extend = list(c(0, 0.5), 0.5, c(0.5, 0), 0, c(0, 0.5), 
                        c(0.5, 0), 0, c(0, 0.5), c(0.5, 0), 0)
            ) %>% 
       dplyr::group_split(barsize)

p <- p +
     geom_cladelab(
         data = dat[[1]],
         mapping = aes(
             node = node,
             label = name,
             color = group,
             offset = offset,
             offset.text = offset.text,
             extend = extend
         ),
         barsize = 1.2,
         fontface = 3,
         align = TRUE
     ) +
     geom_cladelab(
         data = dat[[2]],
         mapping = aes(
             node = node,
             label = name,
             offset = offset,
             offset.text =offset.text,
             extend = extend
         ),
         barcolor = "darkgrey",
         textcolor = "darkgrey",
         barsize = 2,
         fontsize = 5,
         fontface = 3,
         align = TRUE
     ) +
     geom_cladelab(
         data = dat[[3]],
         mapping = aes(
             node = node,
             label = name,
             offset = offset,
             offset.text = offset.text,
             extend = extend
         ),
         barcolor = "darkgrey",
         textcolor = "darkgrey",
         barsize = 3.2,
         fontsize = 5,
         fontface = 3,
         align = TRUE
     ) +
     geom_strip(65, 71, "italic(B)", color = "darkgrey", 
                offset = 0.00315, align = TRUE, offset.text = 0.0002, 
                barsize = 2, fontsize = 5, parse = TRUE)

## Optional
## display support values
p <- p + geom_nodelab(aes(subset = (node == 92), label = "*"), 
                    color = "black", nudge_x = -.001, nudge_y = 1) +
    geom_nodelab(aes(subset = (node == 155), label = "*"), 
                    color = "black", nudge_x = -.0003, nudge_y = -1) +
    geom_nodelab(aes(subset = (node == 158), label = "95/92/1.00"), 
                    color = "black", nudge_x = -0.0001, 
                    nudge_y = -1, hjust = 1) +
    geom_nodelab(aes(subset = (node == 162), label = "98/97/1.00"), 
                    color = "black", nudge_x = -0.0001, 
                    nudge_y = -1, hjust = 1) +
    geom_nodelab(aes(subset = (node == 172), label = "*"), 
                    color = "black", nudge_x = -.0003, nudge_y = -1) 
## extract accession numbers from tip labels
tl <- tree$tip.label
acc <- sub("\\w+\\|", "", tl)
names(tl) <- acc

## read sequences from GenBank directly into R
## and convert the object to DNAStringSet
tipseq <- ape::read.GenBank(acc) %>% as.character %>% 
    lapply(., paste0, collapse = "") %>% unlist %>% 
    DNAStringSet
## align the sequences using muscle
tipseq_aln <- muscle::muscle(tipseq)
tipseq_aln <- DNAStringSet(tipseq_aln)
## calculate pairwise hamming distances among sequences
tipseq_dist <- stringDist(tipseq_aln, method = "hamming")

## calculate the percentage of differences
tipseq_d <- as.matrix(tipseq_dist) / width(tipseq_aln[1]) * 100

## convert the matrix to a tidy data frame for facet_plot
dd <- as_tibble(tipseq_d)
dd$seq1 <- rownames(tipseq_d)
td <- gather(dd,seq2, dist, -seq1)
td$seq1 <- tl[td$seq1]
td$seq2 <- tl[td$seq2]

g <- p$data$group
names(g) <- p$data$label
td$clade <- g[td$seq2] 

## visualize the sequence differences using dot plot and line plot
## and align the sequence difference plot to the tree using facet_plot
p2 <- p + geom_facet(panel = "Sequence Distance", 
            data = td, geom = geom_point, alpha = .6, 
            mapping = aes(x = dist, color = clade, shape = clade)) +
    geom_facet(panel = "Sequence Distance", 
            data = td, geom = geom_path, alpha = .6, 
            mapping=aes(x = dist, group = seq2, color = clade)) + 
    scale_shape_manual(values = 1:8, guide = FALSE) 

print(p2)
image.png

10.Highlighting different groups.

library(TDbook)
mytree <- tree_treenwk_30.4.19

# Define nodes for coloring later on
tiplab <- mytree$tip.label
cls <- tiplab[grep("^ch", tiplab)] 
labeltree <- groupOTU(mytree, cls)

p <- ggtree(labeltree, aes(color=group, linetype=group), layout="circular") +
    scale_color_manual(values = c("#efad29", "#63bbd4")) +
    geom_nodepoint(color="black", size=0.1) +
    geom_tiplab(size=2, color="black")

p2 <- flip(p, 136, 110) %>% 
    flip(141, 145) %>% 
    rotate(141) %>% 
    rotate(142) %>% 
    rotate(160) %>% 
    rotate(164) %>% 
    rotate(131)

### Group V and II coloring 
dat <- data.frame(
           node = c(110, 88, 156,136),
           fill = c("#229f8a", "#229f8a", "#229f8a", "#f9311f")
       )
p3 <- p2 +
      geom_hilight(
          data = dat,
          mapping = aes(
              node = node,
              fill = I(fill)
          ),
          alpha = 0.2,
          extendto = 1.4
      )

### Putting on a label on the avian specific expansion 
p4 <- p3 +
      geom_cladelab(
          node = 113,
          label = "Avian-specific expansion",
          align = TRUE,
          angle = -35,
          offset.text = 0.05,
          hjust = "center",
          fontsize = 2,
          offset = .2,
          barsize = .2
      )

### Adding the bootstrap values with subset used to remove all bootstraps < 50  
p5 <- p4 +
      geom_nodelab(
          mapping = aes(
              x = branch,
              label = label,
              subset = !is.na(as.numeric(label)) & as.numeric(label) > 50
          ),
          size = 2,
          color = "black",
          nudge_y = 0.6
      )

### Putting labels on the subgroups 
p6 <- p5 +
      geom_cladelab(
          data = data.frame(
              node = c(114, 121),
              name = c("Subgroup A", "Subgroup B")
          ),
          mapping = aes(
              node = node,
              label = name
          ),
          align = TRUE,
          offset = .05,
          offset.text = .03,
          hjust = "center",
          barsize = .2,
          fontsize = 2,
          angle = "auto",
          horizontal = FALSE
      ) +
      theme(
          legend.position = "none",
          plot.margin = grid::unit(c(-15, -15, -15, -15), "mm")
      )
print(p6)
image.png

11.Phylogenetic tree with genome locus structure

library(dplyr)
library(ggplot2)
library(gggenes)
library(ggtree)

get_genes <- function(data, genome) {
    filter(data, molecule == genome) %>% pull(gene)
}

g <- unique(example_genes[,1])
n <- length(g)
d <- matrix(nrow = n, ncol = n)
rownames(d) <- colnames(d) <- g
genes <- lapply(g, get_genes, data = example_genes)

for (i in 1:n) {
    for (j in 1:i) {
        jaccard_sim <- length(intersect(genes[[i]], genes[[j]])) / 
                       length(union(genes[[i]], genes[[j]]))
        d[j, i] <- d[i, j] <- 1 - jaccard_sim
    }
}

tree <- ape::bionj(d) 

p <- ggtree(tree, branch.length='none') + 
    geom_tiplab() + xlim_tree(5.5) + 
    geom_facet(mapping = aes(xmin = start, xmax = end, fill = gene),
               data = example_genes, geom = geom_motif, panel = 'Alignment',
               on = 'genE', label = 'gene', align = 'left') +
    scale_fill_brewer(palette = "Set3") + 
    scale_x_continuous(expand=c(0,0)) +
    theme(strip.text=element_blank(),
        panel.spacing=unit(0, 'cm'))

facet_widths(p, widths=c(1,2))
image.png
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末趁尼,一起剝皮案震驚了整個(gè)濱河市埃碱,隨后出現(xiàn)的幾起案子,更是在濱河造成了極大的恐慌酥泞,老刑警劉巖砚殿,帶你破解...
    沈念sama閱讀 206,839評論 6 482
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件,死亡現(xiàn)場離奇詭異芝囤,居然都是意外死亡似炎,警方通過查閱死者的電腦和手機(jī)辛萍,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 88,543評論 2 382
  • 文/潘曉璐 我一進(jìn)店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來羡藐,“玉大人贩毕,你說我怎么就攤上這事∑袜拢” “怎么了辉阶?”我有些...
    開封第一講書人閱讀 153,116評論 0 344
  • 文/不壞的土叔 我叫張陵,是天一觀的道長瘩扼。 經(jīng)常有香客問我谆甜,道長,這世上最難降的妖魔是什么集绰? 我笑而不...
    開封第一講書人閱讀 55,371評論 1 279
  • 正文 為了忘掉前任规辱,我火速辦了婚禮,結(jié)果婚禮上栽燕,老公的妹妹穿的比我還像新娘罕袋。我一直安慰自己,他們只是感情好碍岔,可當(dāng)我...
    茶點(diǎn)故事閱讀 64,384評論 5 374
  • 文/花漫 我一把揭開白布炫贤。 她就那樣靜靜地躺著,像睡著了一般付秕。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上侍郭,一...
    開封第一講書人閱讀 49,111評論 1 285
  • 那天询吴,我揣著相機(jī)與錄音,去河邊找鬼亮元。 笑死猛计,一個(gè)胖子當(dāng)著我的面吹牛,可吹牛的內(nèi)容都是我干的爆捞。 我是一名探鬼主播奉瘤,決...
    沈念sama閱讀 38,416評論 3 400
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼煮甥!你這毒婦竟也來了盗温?” 一聲冷哼從身側(cè)響起,我...
    開封第一講書人閱讀 37,053評論 0 259
  • 序言:老撾萬榮一對情侶失蹤成肘,失蹤者是張志新(化名)和其女友劉穎卖局,沒想到半個(gè)月后,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體双霍,經(jīng)...
    沈念sama閱讀 43,558評論 1 300
  • 正文 獨(dú)居荒郊野嶺守林人離奇死亡砚偶,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點(diǎn)故事閱讀 36,007評論 2 325
  • 正文 我和宋清朗相戀三年批销,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片染坯。...
    茶點(diǎn)故事閱讀 38,117評論 1 334
  • 序言:一個(gè)原本活蹦亂跳的男人離奇死亡均芽,死狀恐怖,靈堂內(nèi)的尸體忽然破棺而出单鹿,到底是詐尸還是另有隱情掀宋,我是刑警寧澤,帶...
    沈念sama閱讀 33,756評論 4 324
  • 正文 年R本政府宣布羞反,位于F島的核電站布朦,受9級特大地震影響,放射性物質(zhì)發(fā)生泄漏昼窗。R本人自食惡果不足惜是趴,卻給世界環(huán)境...
    茶點(diǎn)故事閱讀 39,324評論 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望澄惊。 院中可真熱鬧唆途,春花似錦、人聲如沸掸驱。這莊子的主人今日做“春日...
    開封第一講書人閱讀 30,315評論 0 19
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽毕贼。三九已至温赔,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間鬼癣,已是汗流浹背陶贼。 一陣腳步聲響...
    開封第一講書人閱讀 31,539評論 1 262
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留待秃,地道東北人拜秧。 一個(gè)月前我還...
    沈念sama閱讀 45,578評論 2 355
  • 正文 我出身青樓,卻偏偏與公主長得像章郁,于是被迫代替她去往敵國和親枉氮。 傳聞我的和親對象是個(gè)殘疾皇子,可洞房花燭夜當(dāng)晚...
    茶點(diǎn)故事閱讀 42,877評論 2 345

推薦閱讀更多精彩內(nèi)容