這幾年器赞,我也陸陸續(xù)續(xù)在paper中看到一些環(huán)形的富集圖垢袱,有時候也挺好的,今天我們也來學習一下畫法港柜,繼續(xù)練習circlize的技巧惶桐。
這次,我找了一個經常做的GO富集的結果拿來做測試。
circlize包提供了一些專門的基因組繪圖函數(shù)姚糊,讓基因組分析更加簡單方便,如:
circos.genomicTrack(): 添加軌跡和圖形
circos.genomicPoints(): 添加點
circos.genomicLines(): 添加線條或線段
circos.genomicRect(): 添加矩形
circos.genomicText(): 添加文本
circos.genomicLink(): 添加連接
這樣函數(shù)與基礎的繪制函數(shù)是類似的授舟,只是接受的輸入數(shù)據(jù)格式不同救恨,都是基于基礎的circlize繪圖函數(shù)實現(xiàn)的(如circos.track(),circos.points()等)。
library(grid)
library(graphics)
library(ComplexHeatmap)
data <- read.table("data.txt",header = T,sep="\t")
data_new <- data[,c(2,1,4,5,6)]
# 通路總基因數(shù):min -- 0释树、 max -- 生物的總基因數(shù)肠槽、rich表示該通路中的基因數(shù);
data_new$gene_num.min <- 0
# -log10 p值
data_new$"-log10Pvalue" <- -log10(data$pvalue)
#data_new$up.regulated <- round(data$GeneNum*data$UpRatio)
#data_new$down.regulated <- data_new$GeneNum-data_new$up.regulated
data_new$up.regulated? <-round(data$UpRatio,2)
data_new$down.regulated? <-round(data$DownRatio,2)
colnames(data_new)[c(1,2,3,4)] <- c("id", "category","gene_num.rich","gene_num.max")
id奢啥,富集GO的ID秸仙;
category,富集通路所屬的高級分類桩盲;
gene_num.min和gene_num.max寂纪,gene_num.min均為0,gene_num.max為目標通路中所含基因(背景基因)總數(shù)目赌结;
gene_num.rich捞蛋,富集到目標通路的基因數(shù)量,也就是差異基因在這個GO上的數(shù)目柬姚;
-log10Pvalue拟杉,富集分析的p值,已做了-log10轉換處理量承;
up.regulated和down.regulated搬设,富集到該通路中的基因中,顯著上調和下調基因的數(shù)量比例撕捍;
Ratio拿穴,各個通路的富集因子,或者富集得分卦洽,已標準化至[0,1]區(qū)間贞言。
然后我們BP,MF,CC各取了top6的來顯示。
dat<- bind_rows(
data_new %>%filter(category == 'BP') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'MF') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'CC') %>%arrange('-log10Pvalue') %>% head(6),
)
dat$id <- factor(dat$id, levels = dat$id)? #變成因子阀蒂,是可以按照我們先要的順序來顯示
rownames(dat)<-dat$id
首先開始繪制该窗,第一個圈,也就是表征GO ID的圈蚤霞,圈的大小有gene_num.max來決定酗失。
# 第一個圈:繪制id
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]
ko_color <- c(rep('#F7CC13',6), rep('#954572',6), rep('#0796E0',6)) #各分類的顏色和數(shù)目
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)? ?
circos.track(
? ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,?
? panel.fun = function(x, y) {
? ? ylim = get.cell.meta.data('ycenter')?
? ? xlim = get.cell.meta.data('xcenter')
? ? sector.name = get.cell.meta.data('sector.index')?
? ? circos.axis(h = 'top', labels.cex = 0.4, labels.niceFacing = FALSE)
? ? circos.text(xlim, ylim, sector.name, cex = 0.6,col="white",niceFacing = FALSE)?
? } )
# 第二圈,繪制富集的基因和富集p值
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
plot_data$gene_num.rich[16] <- 901? # 為了顯示的更清晰昧绣,手動改了3個值
plot_data$gene_num.rich[11] <- 1023
plot_data$gene_num.rich[3] <- 1201
label_data <- dat['gene_num.rich']?
p_max <- round(max(dat$'-log10Pvalue')) + 1?
colorsChoice <- colorRampPalette(c('white', 'blue'))?
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
? plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,??#圈圖的高度规肴、顏色等設置
? panel.fun = function(region, value, ...) {
? ? circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)???#區(qū)塊的長度反映了富集基因的數(shù)量,顏色與?p?值有關
? ? ylim = get.cell.meta.data('ycenter')?
? ? xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
? ? sector.name = label_data[get.cell.meta.data('sector.index'),1]? #ylim、xlim拖刃、sector.name?等用于指定文字標簽(富集基因數(shù)量)添加的合適坐標
? ? circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)??#將文字標簽添(富集基因數(shù)量)加在圖中指定位置處
? } )
# 第三圈删壮,繪制上下調基因的比例
dat$up <- dat$up.regulated * dat$gene_num.max
plot_data_up <- dat[c('id', 'gene_num.min', 'up')]
names(plot_data_up) <- c('id', 'start', 'end')
plot_data_up$type <- 1?
dat$down <- dat$down.regulated * dat$gene_num.max + dat$up
plot_data_down <- dat[c('id', 'up', 'down')]
names(plot_data_down) <- c('id', 'start', 'end')
plot_data_down$type <- 2?
#選擇作圖數(shù)據(jù)集(作圖用)、標簽數(shù)據(jù)集(添加相應的文字標識用)兑牡,并分別為上下調基因賦值不同顏色
plot_data <- rbind(plot_data_up, plot_data_down)
label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]
color_assign <- colorRamp2(breaks = c(1, 2), col = c('red', 'blue'))
circos.genomicTrackPlotRegion(
? plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
? panel.fun = function(region, value, ...) {
? ? circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)?
? ? ylim = get.cell.meta.data('cell.bottom.radius') - 0.5
? ? xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
? ? sector.name = label_data[get.cell.meta.data('sector.index'),3]
? ? circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)??#將文字標簽(上調基因比例)添加在圖中指定位置處
? ? xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2
? ? sector.name = label_data[get.cell.meta.data('sector.index'),4]
? ? circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)??#將下調基因比例的標簽也添加在圖中
? } )
# 第四圈央碟,繪制富集因子
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'Ratio')]
plot_data$Ratio <- plot_data$Ratio *10
label_data <- dat['category']?
color_assign <- c('BP' = '#F7CC13', 'CC' = '#954572', 'MF' = '#0796E0')#各二級分類的名稱和顏色
circos.genomicTrack(
? plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA,?
? panel.fun = function(region, value, ...) {
? ? sector.name = get.cell.meta.data('sector.index')?? #sector.name 用于提取 GO id 名稱,并添加在下一句中匹配 GO對應的高級分類均函,以分配顏色
? ? circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...)??#繪制矩形區(qū)塊亿虽,高度代表富集因子數(shù)值,顏色代表 GO的分類
? ? circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3)??#在富集因子等于 0.5 的位置處添加一個灰線
? } )
下面我們來添加legend苞也。但是洛勉,circlize包繪制的圈圖是沒有圖例的。若有需要如迟,您可以選擇用AI收毫、PS等工具手動繪制圖例,也可以借助其它一些R包實現(xiàn)氓涣,例如ComplexHeatmap包牛哺。
是可以直接產生legend,可以AI添加劳吠,也可以直接添加到圖片上引润。
category_legend <- Legend(
? title="Category",
? labels = c('BP', 'CC', 'MF'),#各二級分類的名稱
? type = 'points', pch = NA, background = c('#F7CC13', '#954572', '#0796E0'), #各二級分類的顏色
? labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
? nr=1,
? title_gp = gpar(fontsize = 9),title_position = 'topleft')
updown_legend <- Legend(
? title="Differential expressed",
? labels = c('Up-regulated', 'Down-regulated'),
? type = 'points', pch = NA, background = c('red', 'blue'),
? labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
? #nr=1,
? title_gp = gpar(fontsize = 9),title_position = 'topleft')
pvalue_legend <- Legend(
? title = '-Log10(Pvalue)',
? col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
? ? ? ? ? ? ? ? ? ? ? colorRampPalette(c('#FF906F', '#861D30'))(6)),
? legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
? direction = "horizontal",
? title_gp = gpar(fontsize = 9), title_position = 'topleft')
lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)? #里面有很多參數(shù)可以調整幾個圖例的排版
#grid.draw(lgd_list_vertical)
draw(lgd_list_vertical)
總的代碼如下:
library(circlize)
library(grid)
library(graphics)
library(ComplexHeatmap)
data <- read.table("data.txt",header = T,sep="\t")
data_new <- data[,c(2,1,4,5,6)]
# 通路總基因數(shù):min -- 0、 max -- 生物的總基因數(shù)
data_new$gene_num.min <- 0
# -log10 p值
data_new$"-log10Pvalue" <- -log10(data$pvalue)
#data_new$up.regulated <- round(data$GeneNum*data$UpRatio)
#data_new$down.regulated <- data_new$GeneNum-data_new$up.regulated
data_new$up.regulated? <-round(data$UpRatio,2)
data_new$down.regulated? <-round(data$DownRatio,2)
colnames(data_new)[c(1,2,3,4)] <- c("id", "category","gene_num.rich","gene_num.max")
dat<- bind_rows(
data_new %>%filter(category == 'BP') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'MF') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'CC') %>%arrange('-log10Pvalue') %>% head(6),
)
dat$id <- factor(dat$id, levels = dat$id)
rownames(dat)<-dat$id
pdf('circlize.pdf', width = 12, height = 15)
# 第一個圈:繪制id
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]
ko_color <- c(rep('#F7CC13',6), rep('#954572',6), rep('#0796E0',6)) #各二級分類的顏色和數(shù)目
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
? ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,?
? panel.fun = function(x, y) {
? ? ylim = get.cell.meta.data('ycenter')?
? ? xlim = get.cell.meta.data('xcenter')
? ? sector.name = get.cell.meta.data('sector.index')?
? ? circos.axis(h = 'top', labels.cex = 0.4, labels.niceFacing = FALSE)
? ? circos.text(xlim, ylim, sector.name, cex = 0.6,col="white",niceFacing = FALSE)?
? } )
# 第二圈痒玩,繪制富集的基因和富集p值
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
plot_data$gene_num.rich[16] <- 901? # 為了顯示的更清晰淳附,手動改了3個值
plot_data$gene_num.rich[11] <- 1023
plot_data$gene_num.rich[3] <- 1201
label_data <- dat['gene_num.rich']?
p_max <- round(max(dat$'-log10Pvalue')) + 1?
#colorsChoice <- colorRampPalette(c('#FF906F', '#861D30'))?
colorsChoice <- colorRampPalette(c('white', 'blue'))?
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
? plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,?
? panel.fun = function(region, value, ...) {
? ? circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)?
? ? ylim = get.cell.meta.data('ycenter')?
? ? xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
? ? sector.name = label_data[get.cell.meta.data('sector.index'),1]
? ? circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)?
? } )
# 第三圈,繪制上下調基因
dat$up <- dat$up.regulated * dat$gene_num.max
plot_data_up <- dat[c('id', 'gene_num.min', 'up')]
names(plot_data_up) <- c('id', 'start', 'end')
plot_data_up$type <- 1?
dat$down <- dat$down.regulated * dat$gene_num.max + dat$up
plot_data_down <- dat[c('id', 'up', 'down')]
names(plot_data_down) <- c('id', 'start', 'end')
plot_data_down$type <- 2?
plot_data <- rbind(plot_data_up, plot_data_down)
label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]
color_assign <- colorRamp2(breaks = c(1, 2), col = c('red', 'blue'))
circos.genomicTrackPlotRegion(
? plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
? panel.fun = function(region, value, ...) {
? ? circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)?
? ? ylim = get.cell.meta.data('cell.bottom.radius') - 0.5
? ? xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
? ? sector.name = label_data[get.cell.meta.data('sector.index'),3]
? ? circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)?
? ? xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2
? ? sector.name = label_data[get.cell.meta.data('sector.index'),4]
? ? circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)?
? } )
# 第四圈蠢古,繪制富集因子
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'Ratio')]
plot_data$Ratio <- plot_data$Ratio *10
label_data <- dat['category']?
color_assign <- c('BP' = '#F7CC13', 'CC' = '#954572', 'MF' = '#0796E0')#各二級分類的名稱和顏色
circos.genomicTrack(
? plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA,?
? panel.fun = function(region, value, ...) {
? ? sector.name = get.cell.meta.data('sector.index')?
? ? circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...)?
? ? circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3)?
? } )
category_legend <- Legend(
? title="Category",
? labels = c('BP', 'CC', 'MF'),#各二級分類的名稱
? type = 'points', pch = NA, background = c('#F7CC13', '#954572', '#0796E0'), #各二級分類的顏色
? labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
? nr=1,
? title_gp = gpar(fontsize = 9),title_position = 'topleft')
updown_legend <- Legend(
? title="Differential expressed",
? labels = c('Up-regulated', 'Down-regulated'),
? type = 'points', pch = NA, background = c('red', 'blue'),
? labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
? #nr=1,
? title_gp = gpar(fontsize = 9),title_position = 'topleft')
pvalue_legend <- Legend(
? title = '-Log10(Pvalue)',
? col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
? ? ? ? ? ? ? ? ? ? ? colorRampPalette(c('#FF906F', '#861D30'))(6)),
? legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
? direction = "horizontal",
? title_gp = gpar(fontsize = 9), title_position = 'topleft')
lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)
#grid.draw(lgd_list_vertical)
draw(lgd_list_vertical)
dev.off()