數(shù)據(jù)下載
官網(wǎng)訪問有障礙,直接去xena吧。
1.讀取表達(dá)矩陣
rm(list = ls())
dat = data.table::fread("gtex_RSEM_gene_tpm.gz",data.table = F)
dat[1:4,1:4]
## sample GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## 1 ENSG00000242268.2 -3.4580 -9.9658
## 2 ENSG00000259041.1 -9.9658 -9.9658
## 3 ENSG00000270112.3 -3.6259 -2.1779
## 4 ENSG00000167578.16 4.5988 4.6294
## GTEX-13QIC-0011-R1a-SM-5O9CJ
## 1 -9.9658
## 2 -9.9658
## 3 -1.8314
## 4 6.4989
library(tidyverse)
exp = column_to_rownames(dat,"sample") %>% as.matrix()
rownames(exp) = rownames(exp) %>% str_split("\\.",simplify = T) %>% .[,1]
exp[1:4,1:4]
## GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## ENSG00000242268 -3.4580 -9.9658
## ENSG00000259041 -9.9658 -9.9658
## ENSG00000270112 -3.6259 -2.1779
## ENSG00000167578 4.5988 4.6294
## GTEX-13QIC-0011-R1a-SM-5O9CJ GTEX-ZPCL-0126-SM-4WWC8
## ENSG00000242268 -9.9658 -9.9658
## ENSG00000259041 -9.9658 -9.9658
## ENSG00000270112 -1.8314 -9.9658
## ENSG00000167578 6.4989 5.5358
# 轉(zhuǎn)換行名
library(AnnoProbe)
library(tinyarray)
an = annoGene(rownames(exp),ID_type = "ENSEMBL")
exp = trans_array(exp,ids = an,from = "ENSEMBL",to = "SYMBOL")
exp[1:4,1:4]
## GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## DDX11L1 -5.0116 -9.9658
## WASH7P 4.4283 3.8370
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
## GTEX-13QIC-0011-R1a-SM-5O9CJ GTEX-ZPCL-0126-SM-4WWC8
## DDX11L1 -9.9658 -9.9658
## WASH7P 3.5863 3.6793
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
2. 讀取注釋信息
clinical = data.table::fread("GTEX_phenotype.gz")
table(clinical$`_primary_site`)
##
## <not provided> Adipose Tissue Adrenal Gland Bladder Blood
## 5 621 161 13 595
## Blood Vessel Bone Marrow Brain Breast Cervix Uteri
## 753 102 1426 221 11
## Colon Esophagus Fallopian Tube Heart Kidney
## 384 805 7 493 38
## Liver Lung Muscle Nerve Ovary
## 141 381 478 335 112
## Pancreas Pituitary Prostate Salivary Gland Skin
## 203 126 122 71 977
## Small Intestine Spleen Stomach Testis Thyroid
## 106 121 209 208 366
## Uterus Vagina
## 93 99
clinical = clinical[clinical$`_primary_site`!="<not provided>",]
colnames(clinical)[3] = "site"
3.表達(dá)矩陣和臨床信息對(duì)應(yīng)起來
s = intersect(colnames(exp),clinical$Sample)
clinical = clinical[match(s,clinical$Sample),]
exp = exp[,s]
identical(clinical$Sample,colnames(exp))
## [1] TRUE
exp[1:4,1:4]
## GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## DDX11L1 -5.0116 -9.9658
## WASH7P 4.4283 3.8370
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
## GTEX-13QIC-0011-R1a-SM-5O9CJ GTEX-ZPCL-0126-SM-4WWC8
## DDX11L1 -9.9658 -9.9658
## WASH7P 3.5863 3.6793
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
4. 單基因表達(dá)量畫圖
library(dplyr)
#"METTL3","SETD2","TP53"
g = "METTL3"
pdat = cbind(gene = exp[g,],clinical[,c(1,3)])
library(tidyr)
pdat = drop_na(pdat,site)
su = group_by(pdat,site) %>%
summarise(a = median(gene)) %>%
arrange(desc(a))
pdat$site = factor(pdat$site,levels = su$site)
library(ggplot2)
library(RColorBrewer)
mypalette <- colorRampPalette(brewer.pal(8,"Set1"))
ggplot(pdat,aes(x = site,y = gene,fill = site))+
geom_boxplot()+
theme_bw()+
theme(axis.text.x = element_text(vjust = 1,hjust = 1,angle = 70),legend.position = "bottom")+
scale_fill_manual(values = mypalette(31))+
guides (fill=guide_legend (nrow=3, byrow=TRUE))
image.png
5.兩個(gè)基因的相關(guān)性圖
g2 = "SETD2"
pdat2 = cbind(cbind(gene1 = exp[g,],
gene2 = exp[g2,],
clinical[,c(1,3)]))
pdat2 = drop_na(pdat2,site)
k = count(pdat2,site) %>%
filter(n>2)
pdat2 = filter(pdat2,site %in% k$site)
ggplot(pdat2,aes(gene1,gene2))+
geom_point()+
geom_smooth(method=lm)+
geom_rug()+
theme_minimal()+
labs(x = g,y = g2)+
facet_wrap(~site,scales = "free")
image.png
6.炫酷的泛癌相關(guān)性圖
橫縱坐標(biāo)分別是-log10pvalue和相關(guān)系數(shù)r
a <- cor.test(exp[g,],exp[g2,])
a$estimate
## cor
## 0.9251489
a$p.value
## [1] 0
b = pdat2 %>%
group_by(site) %>%
summarise(cor = cor.test(gene1,gene2)$estimate,
nlogp = -log10(cor.test(gene1,gene2)$p.value))
head(b)
## # A tibble: 6 × 3
## site cor nlogp
## <chr> <dbl> <dbl>
## 1 Adipose Tissue 0.640 59.9
## 2 Adrenal Gland 0.972 80.6
## 3 Bladder 0.344 0.438
## 4 Blood 0.948 221\.
## 5 Blood Vessel 0.883 200\.
## 6 Bone Marrow 0.402 3.25
library(ggplot2)
options(scipen = 5)
# 橫坐標(biāo)起始位置不可以設(shè)為0伊脓,因?yàn)檫@是對(duì)數(shù)坐標(biāo)系,log(0)無意義碌冶。
# 統(tǒng)一設(shè)置0.01或者0.1這樣盅藻,會(huì)使一些p值太大的點(diǎn)無法呈現(xiàn)在圖上。因此群嗤,根據(jù)-logp最小值計(jì)算起始坐標(biāo)菠隆。
blm = 10^-str_count(min(b$nlogp),"0");blm
## [1] 0.1
bk = 10^((-str_count(min(b$nlogp),"0")):3);bk
## [1] 0.1 1.0 10.0 100.0 1000.0
ggplot(b,aes(nlogp,cor))+
geom_point(size=6,color="#bf77b6")+
scale_y_continuous(expand = c(0,0),limits = c(-1,1),breaks = seq(-1,1,0.2))+
scale_x_log10(expand = c(0,0),limits = c(blm, 1000),breaks = bk)+
geom_hline(yintercept = 0)+
geom_vline(xintercept = -log10(0.05))+
theme_bw()
image.png