MOVICS包整合多組學(xué)數(shù)據(jù)進(jìn)行分析退子,其安裝的過(guò)程太讓人心酸窒所,大家安裝的過(guò)程慢慢體會(huì)眉菱,下面就來(lái)學(xué)習(xí)下這個(gè)由“大魚(yú)海棠”老師開(kāi)發(fā)的R包吧。
1.示例數(shù)據(jù)
brca.tcga.RData
:MOVICS以TCGA乳腺癌的數(shù)據(jù)為例拐格,包含643例樣本的4種完整的組學(xué)信息,作為訓(xùn)練集率拒。brca.yau.RData
: 來(lái)自BRCA-YAU隊(duì)列,包含682例樣本的信息禁荒。
library(MOVICS)
load(system.file("extdata","brca.tcga.RData",package = "MOVICS",mustWork = T))
load(system.file("extdata","brca.yau.RData",package = "MOVICS",mustWork = T))
因多組學(xué)聚類(lèi)過(guò)程中相對(duì)耗費(fèi)時(shí)間,因此預(yù)處理的時(shí)候提取mRNAs, lncRNA的前500角撞,promoter CGI的前1000 高變探針或基因呛伴,前30個(gè)在至少3%的隊(duì)列中變化的基因。
2.工作流程
2.1 流程概述
MOVICS工作流程分為3個(gè)部分:
GET module: get subtypes
COMP module: compare subtypes
RUN module: run marker identification and verify subtypes
2.2 詳細(xì)步驟
2.2.1 分型(GET Module)
names(brca.tcga)
[1] "mRNA.expr" "lncRNA.expr" "meth.beta" "mut.status" "count"
[6] "fpkm" "maf" "segment" "clin.info"
names(brca.yau)
[1] "mRNA.expr" "clin.info"
# 提取多組學(xué)數(shù)據(jù)
mo.data <- brca.tcga[1:4]
count <- brca.tcga$count
fpkm <- brca.tcga$fpkm
maf <- brca.tcga$maf
segment <- brca.tcga$segment #CNV數(shù)據(jù)
surv.info <- brca.tcga$clin.info
輸入數(shù)據(jù)過(guò)濾
核心函數(shù):getElites()
→提供5種方法谒所,包括mad, sd, cox, freq和na.action热康。
tmp <- brca.tcga$mRNA.expr
dim(tmp)
[1] 500 643
#假設(shè)存在NA的值
tmp[1,1] <- tmp[2,2] <- NA
tmp[1:3,1:3]
BRCA-A03L-01A BRCA-A04R-01A BRCA-A075-01A
SCGB2A2 NA 1.42 7.24
SCGB1D2 10.11 NA 5.88
PIP 4.54 2.59 4.35
elite.tmp <- getElites(dat=tmp,
method="mad",
na.action="rm",
elite.pct=1)
dim(elite.tmp$elite.dat) #將這兩個(gè)含有NA的值去掉了
[1] 498 643
elite.tmp <- getElites(dat=tmp,
method = "mad",
na.action = "impute",
elite.pct = 1)
dim(elite.tmp$elite.dat) # impute插補(bǔ)
[1] 500 643
#> 用mad/sd篩選
tmp <- brca.tcga$mRNA.expr
elite.tmp <- getElites(dat=tmp,
method = "mad",
elite.pct = 0.1)
dim(elite.tmp$elite.dat) #選取前10%
[1] 50 643
elite.tmp <- getElites(dat = tmp,
method = "sd",
elite.num = 100,
elite.pct = 0.1)
dim(elite.tmp$elite.dat)
[1] 100 643
# pca方法篩選
tmp <- brca.tcga$mRNA.expr # get expression data with 500 features
elite.tmp <- getElites(dat = tmp,
method = "pca",
pca.ratio = 0.95)
dim(elite.tmp$elite.dat)
[1] 204 643
# cox方法篩選
tmp <- brca.tcga$mRNA.expr
elite.tmp <- getElites(dat=tmp,
method = "cox",
surv.info = surv.info,
p.cutoff = 0.05,
elite.num = 100)
5% 10% 15% 20% 25% 30% 35% 40% 45% 50% 55% 60% 65% 70% 75% 80% 85% 90% 95% 100%
dim(elite.tmp$elite.dat)
[1] 125 643
table(elite.tmp$unicox.res$pvalue<0.05)
FALSE TRUE
375 125
tmp <- brca.tcga$mut.status
elite.tmp <- getElites(dat=tmp,
method = "cox",
surv.info = surv.info,
p.cutoff = 0.05,
elite.num = 100)
7% 13% 20% 27% 33% 40% 47% 53% 60% 67% 73% 80% 87% 93% 100%
dim(elite.tmp$elite.dat)
[1] 3 643
table(elite.tmp$unicox.res$pvalue<0.05)
FALSE TRUE
27 3
#> 突變頻率篩選
tmp <- brca.tcga$mut.status
rowSums(tmp)
PIK3CA TP53 TTN CDH1 GATA3 MLL3 MUC16 MAP3K1 SYNE1 MUC12 DMD
208 186 111 83 58 49 48 38 33 32 31
NCOR1 FLG PTEN RYR2 USH2A SPTA1 MAP2K4 MUC5B NEB SPEN MACF1
31 30 29 27 27 25 25 24 24 23 23
RYR3 DST HUWE1 HMCN1 CSMD1 OBSCN APOB SYNE2
23 22 22 22 21 21 21 21
elite.tmp <- getElites(dat=tmp,
method = "freq",
elite.num = 80,
elite.pct = 0.1)
rowSums(elite.tmp$elite.dat)
PIK3CA TP53 TTN CDH1
208 186 111 83
elite.tmp <- getElites(dat = tmp,
method = "freq",
elite.pct = 0.2)
rowSums(elite.tmp$elite.dat)
PIK3CA TP53
208 186
獲取最佳分類(lèi)
optk.brca <- getClustNum(data = mo.data,
is.binary = c(F,F,F,T), #四種組學(xué)數(shù)據(jù)中只有somatic mutation是二分類(lèi)矩陣
try.N.clust = 2:8,
fig.name = "CLUSTER NUMBER OF TCGA-BRCA")
根據(jù)上圖直接選取N.clust=5作為最佳分類(lèi),然后用多種方法進(jìn)行聚類(lèi)劣领。
moic.res.list <- getMOIC(data=mo.data,
methodslist = list("SNF", "PINSPlus", "NEMO", "COCA", "LRAcluster", "ConsensusClustering", "IntNMF", "CIMLR", "MoCluster"),
N.clust = 5,
type = c("gaussian", "gaussian", "gaussian", "binomial"))
iClusterBayes.res <- getMOIC(data = mo.data,
N.clust = 5,
methodslist = "iClusterBayes", # specify only ONE algorithm here
type = c("gaussian","gaussian","gaussian","binomial"), # data type corresponding to the list
n.burnin = 1800,
n.draw = 1200,
prior.gamma = c(0.5, 0.5, 0.5, 0.5),
sdev = 0.05,
thin = 3)
moic.res.list <- append(moic.res.list,
list("iClusterBayes" = iClusterBayes.res))
save(moic.res.list,file = "moic.res.list.rda")
save(iClusterBayes.res,file = "iClusterBayes.res.rda")
獲取聚類(lèi)矩陣
load("moic.res.list.rda")
cmoic.brca <- getConsensusMOIC(moic.res.list = moic.res.list,
fig.name = "CONSENSUS HEATMAP",
distance = "euclidean",
linkage = "average")
silhoutte進(jìn)行相似性評(píng)價(jià)
getSilhouette(sil=cmoic.brca$sil,
fig.path = getwd(),
fig.name = "SILHOUETTE",
height = 5.5,
width = 5)
png
2
聚類(lèi)熱圖
indata <- mo.data
indata$meth.beta <- log2(indata$meth.beta/(1-indata$meth.beta))
plotdata <- getStdiz(data=indata,
halfwidth = c(2,2,2,NA),
centerFlag = c(T,T,T,F),
scaleFlag = c(T,T,T,F))
值得注意的是iClusterBayes.res$feat.res已經(jīng)按照每個(gè)組學(xué)數(shù)據(jù)特征的后延概率排序姐军,我們可以選取前10個(gè)feature,并生成一個(gè)feature list尖淘,命名為annRow奕锌,對(duì)熱圖進(jìn)行注釋。
load("iClusterBayes.res.rda")
feat <- iClusterBayes.res$feat.res
feat1 <- feat[which(feat$dataset == "mRNA.expr"),][1:10,"feature"]
feat2 <- feat[which(feat$dataset == "lncRNA.expr"),][1:10,"feature"]
feat3 <- feat[which(feat$dataset == "meth.beta"),][1:10,"feature"]
feat4 <- feat[which(feat$dataset == "mut.status"),][1:10,"feature"]
annRow <- list(feat1,feat2,feat3,feat4)
# if no color list specified all subheatmaps will be unified to green and red color pattern
mRNA.col <- c("#00FF00", "#008000", "#000000", "#800000", "#FF0000")
lncRNA.col <- c("#6699CC", "white" , "#FF3C38")
meth.col <- c("#0074FE", "#96EBF9", "#FEE900", "#F00003")
mut.col <- c("grey90" , "black")
col.list <- list(mRNA.col, lncRNA.col, meth.col, mut.col)
生成羨慕已久的MoHeatmap
getMoHeatmap(data = plotdata,
row.title = c("mRNA","lncRNA","Methylation","Mutation"),
is.binary = c(F,F,F,T), # the 4th data is mutation which is binary
legend.name = c("mRNA.FPKM","lncRNA.FPKM","M value","Mutated"),
clust.res = iClusterBayes.res$clust.res, # cluster results
clust.dend = NULL, # no dendrogram
show.rownames = c(F,F,F,F), # specify for each omics data
show.colnames = FALSE, # show no sample names
annRow = annRow, # mark selected features
color = col.list,
annCol = NULL, # no annotation for samples
annColors = NULL, # no annotation color
width = 10, # width of each subheatmap
height = 5, # height of each subheatmap
fig.name = "COMPREHENSIVE HEATMAP OF ICLUSTERBAYES")
為熱圖添加列注釋信息
annCol<- surv.info[,c("PAM50", "pstage", "age"), drop = FALSE]
annColors <- list(age = circlize::colorRamp2(breaks = c(min(annCol$age),
median(annCol$age),
max(annCol$age)),
colors = c("#0000AA", "#555555", "#AAAA00")),
PAM50 = c("Basal" = "blue",
"Her2" = "red",
"LumA" = "yellow",
"LumB" = "green",
"Normal" = "black"),
pstage = c("T1" = "green",
"T2" = "blue",
"T3" = "red",
"T4" = "yellow",
"TX" = "black"))
getMoHeatmap(data = plotdata,
row.title = c("mRNA","lncRNA","Methylation","Mutation"),
is.binary = c(F,F,F,T), # the 4th data is mutation which is binary
legend.name = c("mRNA.FPKM","lncRNA.FPKM","M value","Mutated"),
clust.res = cmoic.brca$clust.res, # consensusMOIC results
clust.dend = NULL, # show no dendrogram for samples
show.rownames = c(F,F,F,F), # specify for each omics data
show.colnames = FALSE, # show no sample names
show.row.dend = c(F,F,F,F), # show no dendrogram for features
annRow = NULL, # no selected features
color = col.list,
annCol = annCol, # annotation for samples
annColors = annColors, # annotation color
width = 10, # width of each subheatmap
height = 5, # height of each subheatmap
fig.name = "COMPREHENSIVE HEATMAP OF CONSENSUSMOIC")
2.2.2 亞型比較(COMP Module)
在確定亞型后村生,往往需要的對(duì)亞型的特征進(jìn)行描述惊暴,因此COMICS提供了常見(jiàn)的亞型特征分析。
(1)預(yù)后分析
surv.brca <- compSurv(moic.res = cmoic.brca,
surv.info = surv.info,
convt.time = "m",
surv.median.line = "h",
xyrs.est = c(5,10),
fig.name = "KAPLAN-MEIER CURVE OF CONSENSUSMOIC")
(2)基線資料
clin.brca <- compClinvar(moic.res = cmoic.brca,
var2comp = surv.info,
strata = "Subtype",
factorVars = c("PAM50","pstage","fustat"),
nonnormalVars = "futime",
exactVars="pstage",
doWord=T,
tab.name="SUMMARIZATION OF CLINICAL FEATURES")
print(clin.brca$compTab)
level CS1 CS2
1 n 170 52
2 fustat (%) 0 144 (84.7) 39 (75.0)
3 1 26 (15.3) 13 (25.0)
4 futime (median [IQR]) 747.50 [461.50, 1392.50] 700.50 [414.75, 1281.25]
5 PAM50 (%) Basal 1 ( 0.6) 2 ( 3.8)
6 Her2 4 ( 2.4) 34 (65.4)
7 LumA 81 (47.6) 2 ( 3.8)
8 LumB 84 (49.4) 9 (17.3)
CS3 CS4 CS5
1 173 137 111
2 163 (94.2) 126 (92.0) 95 (85.6)
3 10 ( 5.8) 11 ( 8.0) 16 (14.4)
4 867.00 [473.00, 1550.00] 680.50 [407.25, 1205.25] 785.00 [479.00, 1628.00]
5 0 ( 0.0) 0 ( 0.0) 108 (97.3)
6 0 ( 0.0) 0 ( 0.0) 0 ( 0.0)
7 154 (89.0) 107 (78.1) 0 ( 0.0)
8 1 ( 0.6) 30 (21.9) 0 ( 0.0)
p test
1
2 0.001
3
4 0.321 nonnorm
5 <0.001
6
7
8
[ reached 'max' / getOption("max.print") -- omitted 7 rows ]
(3)突變頻率
mut.brca <- compMut(moic.res = cmoic.brca,
mut.matrix = brca.tcga$mut.status, # binary somatic mutation matrix
doWord = TRUE, # generate table in .docx format
doPlot = TRUE, # draw OncoPrint
freq.cutoff = 0.05, # keep those genes that mutated in at least 5% of samples
p.adj.cutoff = 0.05, # keep those genes with adjusted p value < 0.05 to draw OncoPrint
innerclust = TRUE, # perform clustering within each subtype
annCol = annCol, # same annotation for heatmap
annColors = annColors, # same annotation color for heatmap
width = 6,
height = 2,
fig.name = "ONCOPRINT FOR SIGNIFICANT MUTATIONS",
tab.name = "INDEPENDENT TEST BETWEEN SUBTYPE AND MUTATION")
print(mut.brca)
Gene (Mutated) TMB CS1 CS2 CS3 CS4
1 PIK3CA 208 (32%) 44 (25.9%) 17 (32.7%) 80 (46.2%) 65 (47.4%)
2 TP53 186 (29%) 46 (27.1%) 39 (75.0%) 11 ( 6.4%) 8 ( 5.8%)
3 TTN 111 (17%) 31 (18.2%) 20 (38.5%) 25 (14.5%) 15 (10.9%)
4 CDH1 83 (13%) 7 ( 4.1%) 3 ( 5.8%) 57 (32.9%) 15 (10.9%)
5 GATA3 58 ( 9%) 19 (11.2%) 1 ( 1.9%) 16 ( 9.2%) 22 (16.1%)
6 MLL3 49 ( 8%) 14 ( 8.2%) 5 ( 9.6%) 11 ( 6.4%) 14 (10.2%)
7 MUC16 48 ( 8%) 10 ( 5.9%) 9 (17.3%) 14 ( 8.1%) 7 ( 5.1%)
8 MAP3K1 38 ( 6%) 7 (4.1%) 1 (1.9%) 15 (8.7%) 13 (9.5%)
CS5 pvalue padj
1 2 ( 1.8%) 1.30e-20 5.85e-20
2 82 (73.9%) 2.26e-52 2.03e-51
3 20 (18.0%) 8.45e-04 1.52e-03
4 1 ( 0.9%) 4.67e-18 1.40e-17
5 0 ( 0.0%) 5.59e-06 1.26e-05
6 5 ( 4.5%) 4.39e-01 4.39e-01
7 8 ( 7.2%) 9.43e-02 1.06e-01
8 2 (1.8%) 2.20e-02 3.30e-02
[ reached 'max' / getOption("max.print") -- omitted 1 rows ]
(4)TMB
tmb.brca <- compTMB(moic.res = cmoic.brca,
maf = maf,
rmDup = T,
rmFLAGS = F,
exome.size = 38,
test.method = "nonparametric",
fig.name = "DISTRIBUTION OF TMB AND TITV")
-Validating
-Silent variants: 24329
-Summarizing
--Possible FLAGS among top ten genes:
TTN
MUC16
-Processing clinical data
--Missing clinical data
-Finished in 3.340s elapsed (3.170s cpu)
Kruskal-Wallis rank sum test p value = 1.74e-23
post-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:
CS1 CS2 CS3 CS4
CS2 "2.01e-03" " NA" " NA" " NA"
CS3 "1.97e-06" "8.42e-09" " NA" " NA"
CS4 "3.61e-08" "3.61e-11" "8.41e-01" " NA"
CS5 "9.20e-05" "8.81e-01" "1.54e-12" "1.03e-15"
head(tmb.brca$TMB.dat)
samID variants TMB log10TMB Subtype
570 BRCA-A1EW-01A 9 0.2368421 0.09231426 CS1
564 BRCA-A0XT-01A 10 0.2631579 0.10145764 CS1
548 BRCA-A1ES-01A 13 0.3421053 0.12778658 CS1
541 BRCA-A1NG-01A 14 0.3684211 0.13621975 CS1
546 BRCA-A273-01A 14 0.3684211 0.13621975 CS1
537 BRCA-A1KP-01A 15 0.3947368 0.14449227 CS1
(5)FGA
FGA: fraction genome altered
colnames(segment) <- c("sample","chrom","start","end","value")
head(segment)
sample chrom start end value
1 BRCA-A090-01A 1 3301765 54730235 -0.1271
2 BRCA-A090-01A 1 54730247 57443819 -0.0899
3 BRCA-A090-01A 1 57448465 57448876 -1.1956
4 BRCA-A090-01A 1 57448951 64426102 -0.1009
5 BRCA-A090-01A 1 64426648 106657734 -0.1252
6 BRCA-A090-01A 1 106657854 106835667 0.1371
fga.brca <- compFGA(moic.res = cmoic.brca,
segment = segment,
iscopynumber = F, #this is a segmented copy number file
cnathreshold = 0.2,
test.method = "nonparametric",
fig.name = "BARPLOT OF FGA")
5% 10% 15% 21% 26% 31% 36% 41% 46% 51% 57% 62% 67% 72% 77% 82% 88% 93% 98%
head(fga.brca$summary)
samID FGA FGG FGL Subtype
1 BRCA-A03L-01A 0.6217991 0.30867268 0.31312638 CS1
2 BRCA-A04R-01A 0.2531019 0.09132014 0.16178176 CS1
3 BRCA-A075-01A 0.7007067 0.41444237 0.28626433 CS2
4 BRCA-A08O-01A 0.6501287 0.45648145 0.19364725 CS1
5 BRCA-A0A6-01A 0.1468893 0.06356488 0.08332444 CS3
6 BRCA-A0AD-01A 0.1722214 0.03864521 0.13357618 CS4
(6)藥物敏感性
drug.brca <- compDrugsen(moic.res = cmoic.brca,
norm.expr = fpkm[,cmoic.brca$clust.res$samID],
drugs = c("Cisplatin", "Paclitaxel"),
tissueType = "breast",
test.method = "nonparametric",
prefix = "BOXVIOLIN OF ESTIMATED IC50")
Cisplatin: Kruskal-Wallis rank sum test p value = 6.61e-61
post-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:
CS1 CS2 CS3 CS4
CS2 "2.18e-19" " NA" " NA" " NA"
CS3 "1.07e-18" "6.45e-08" " NA" " NA"
CS4 "6.93e-01" "2.18e-19" "5.76e-17" " NA"
CS5 "2.92e-33" "1.50e-01" "2.74e-16" "4.45e-32"
Paclitaxel: Kruskal-Wallis rank sum test p value = 3.59e-17
post-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:
CS1 CS2 CS3 CS4
CS2 "2.54e-11" " NA" " NA" " NA"
CS3 "5.42e-04" "3.65e-15" " NA" " NA"
CS4 "7.08e-03" "3.18e-14" "6.85e-01" " NA"
CS5 "7.61e-01" "2.06e-09" "8.87e-03" "3.29e-02"
(7)與已有亞型對(duì)比
surv.info$pstage <- factor(surv.info$pstage,levels = c("TX","T1","T2","T3","T4"))
agree.brca <- compAgree(moic.res = cmoic.brca,
subt2comp = surv.info[,c("PAM50","pstage")],
doPlot = T,
box.width = 0.2,
fig.name = "AGREEMENT OF CONSENSUSMOIC WITH PAM50 AND PSTAGE")
2.2.3 運(yùn)行模塊(RUN module)
前面已進(jìn)行亞型分析趁桃,并確定各個(gè)亞型的特征辽话,下面來(lái)確定各個(gè)亞型潛在的預(yù)測(cè)標(biāo)志物和相應(yīng)通路。
(1)差異分析
dea.method可以選擇“edger”,“edseq2”和“l(fā)imma”方法卫病,當(dāng)請(qǐng)注意deger和edseq2需要count數(shù)據(jù)油啤,而 limma需要log2處理的TPM/FPKM數(shù)據(jù)。
# DEA with edgeR
runDEA(dea.method = "edger",
expr = count,
moic.res = cmoic.brca,
prefix = "TCGA-BRCA")
(2)Biomarker
默認(rèn)選取每個(gè)亞型的前200個(gè)差異基因(注意:次差異基因不與其他部分的特征基因重合)蟀苛。
marker.up <- runMarker(moic.res = cmoic.brca,
dea.method = "edger", # name of DEA method
prefix = "TCGA-BRCA", # MUST be the same of argument in runDEA()
dat.path = getwd(), # path of DEA files
res.path = getwd(), # path to save marker files
p.cutoff = 0.05, # p cutoff to identify significant DEGs
p.adj.cutoff = 0.05, # padj cutoff to identify significant DEGs
dirct = "up", # direction of dysregulation in expression
n.marker = 100, # number of biomarkers for each subtype
doplot = TRUE, # generate diagonal heatmap
norm.expr = fpkm, # use normalized expression as heatmap input
annCol = annCol, # sample annotation in heatmap
annColors = annColors, # colors for sample annotation
show_rownames = FALSE, # show no rownames (biomarker name)
fig.name = "UPREGULATED BIOMARKER HEATMAP")
head(marker.up$templates)
probe class dirct
1 CACNG6 CS1 up
2 VSTM2A CS1 up
3 SPDYC CS1 up
4 ARHGAP36 CS1 up
5 CST9 CS1 up
6 ASCL1 CS1 up
(3)GSEA分析
MSIGDB.FILE <- system.file("extdata", "c5.bp.v7.1.symbols.xls", package = "MOVICS", mustWork = TRUE)
gsea.up <- runGSEA(moic.res = cmoic.brca,
dea.method = "edger", # name of DEA method
prefix = "TCGA-BRCA", # MUST be the same of argument in runDEA()
dat.path = getwd(), # path of DEA files
res.path = getwd(), # path to save GSEA files
msigdb.path = MSIGDB.FILE, # MUST be the ABSOLUTE path of msigdb file
norm.expr = fpkm, # use normalized expression to calculate enrichment score
dirct = "up", # direction of dysregulation in pathway
p.cutoff = 0.05, # p cutoff to identify significant pathways
p.adj.cutoff = 0.25, # padj cutoff to identify significant pathways
gsva.method = "gsva", # method to calculate single sample enrichment score
norm.method = "mean", # normalization method to calculate subtype-specific enrichment score
fig.name = "UPREGULATED PATHWAY HEATMAP")
Estimating GSEA scores for 50 gene sets.
Estimating ECDFs with Gaussian kernels
(4)GSVA分析
GSET.FILE <-
system.file("extdata", "gene sets of interest.gmt", package = "MOVICS", mustWork = TRUE)
gsva.res <-
runGSVA(moic.res = cmoic.brca,
norm.expr = fpkm,
gset.gmt.path = GSET.FILE, # ABSOLUTE path of gene set file
gsva.method = "gsva", # method to calculate single sample enrichment score
annCol = annCol,
annColors = annColors,
fig.path = getwd(),
fig.name = "GENE SETS OF INTEREST HEATMAP",
height = 5,
width = 8)
Estimating GSVA scores for 21 gene sets.
Estimating ECDFs with Gaussian kernels
print(gsva.res$raw.es[1:3,1:3])
BRCA-A03L-01A BRCA-A04R-01A BRCA-A075-01A
Adhesion 0.09351280 -0.2125523 0.10362446
Antigen_Processing 0.05451682 -0.3909617 0.05508321
B-Cell_Functions -0.01931423 -0.6120864 0.18301567
print(gsva.res$scaled.es[1:3,1:3])
BRCA-A03L-01A BRCA-A04R-01A BRCA-A075-01A
Adhesion 0.28466782 -0.6600776 0.3158800
Antigen_Processing 0.12510682 -1.0000000 0.1267097
B-Cell_Functions -0.04056456 -1.0000000 0.4561039
(5)NTP驗(yàn)證外部數(shù)據(jù)集
yau.ntp.pred <- runNTP(expr = brca.yau$mRNA.expr,
templates = marker.up$templates,
scaleFlag = T,
centerFlag = T,
doPlot = T,
fig.name = "NTP HEATMAP FOR YAU")
CS1 CS2 CS3 CS4 CS5 <NA>
114 84 116 92 141 135
head(yau.ntp.pred$ntp.res)
prediction d.CS1 d.CS2 d.CS3 d.CS4 d.CS5 p.value FDR
107 CS4 0.7102 0.7160 0.7407 0.5794 0.7477 0.0010 0.0019
109 CS2 0.7486 0.5743 0.7140 0.7579 0.7484 0.0010 0.0019
11 CS1 0.6832 0.6998 0.7424 0.7186 0.7678 0.1399 0.1564
110 CS1 0.5051 0.7696 0.7521 0.7140 0.7529 0.0010 0.0019
111 CS1 0.6704 0.6884 0.7781 0.7115 0.7891 0.0060 0.0092
113 CS2 0.7386 0.6364 0.6533 0.7302 0.7271 0.0180 0.0243
驗(yàn)證集的預(yù)后分析
surv.yau <- compSurv(moic.res = yau.ntp.pred,
surv.info = brca.yau$clin.info,
convt.time = "m",
surv.median.line = "hv",
fig.name = "KAPLAN-MEIER CURVE OF NTP FOR YAU")
檢測(cè)下與PAM50分型是否一致
agree.yau <- compAgree(moic.res = yau.ntp.pred,
subt2comp = brca.yau$clin.info[,"PAM50",drop=F],
doPlot = T,
fig.name = "YAU PREDICTEDMOIC WITH PAM50")
print(agree.yau)
current.subtype other.subtype RI AMI JI FM
1 Subtype PAM50 0.7819319 0.369929 0.3286758 0.4959205
Kappa一致性評(píng)價(jià)
yau.pam.pred <- runPAM(train.expr = fpkm,
moic.res = cmoic.brca,
test.expr = brca.yau$mRNA.expr)
tcga.ntp.pred <- runNTP(expr = fpkm,
templates = marker.up$templates,
doPlot = FALSE)
CS1 CS2 CS3 CS4 CS5 <NA>
139 79 162 105 108 50
tcga.pam.pred <- runPAM(train.expr = fpkm,
moic.res = cmoic.brca,
test.expr = fpkm)
runKappa(subt1 = cmoic.brca$clust.res$clust,
subt2 = tcga.ntp.pred$clust.res$clust,
subt1.lab = "CMOIC",
subt2.lab = "NTP",
fig.name = "CONSISTENCY HEATMAP FOR TCGA between CMOIC and NTP")
runKappa(subt1 = cmoic.brca$clust.res$clust,
subt2 = tcga.pam.pred$clust.res$clust,
subt1.lab = "CMOIC",
subt2.lab = "PAM",
fig.name = "CONSISTENCY HEATMAP FOR TCGA between CMOIC and PAM")
runKappa(subt1 = yau.ntp.pred$clust.res$clust,
subt2 = yau.pam.pred$clust.res$clust,
subt1.lab = "NTP",
subt2.lab = "PAM",
fig.name = "CONSISTENCY HEATMAP FOR YAU")
3.彩蛋
通過(guò)整套流程益咬,你會(huì)發(fā)現(xiàn)moic.res
幾乎貫穿整個(gè)下游分析,甚至可以說(shuō)如果無(wú)法通過(guò)GET module獲取合適的moic.res
對(duì)象屹逛,下游分析就涼涼啦础废。事實(shí)上汛骂,moic.res
的核心信息是clust.res
。 clust.res
是一個(gè)data.frame评腺,有samID列與其他數(shù)據(jù)匹配即可帘瞭。另一個(gè)參數(shù)是mo.method
,作為輸出數(shù)據(jù)的前綴蒿讥。 例如蝶念,需要檢測(cè)下PAM50分型的預(yù)測(cè)價(jià)值,僅僅需要設(shè)置pseudo.moic.res
對(duì)象和自定義的mo.method
即可芋绸。這幾乎為任何亞型分析媒殉,打開(kāi)了洪荒之力的大門(mén)。驚嘆Kち病M⑷亍!
pseudo.moic.res <- list("clust.res"=surv.info,
"mo.method"="PAM50")
pseudo.moic.res$clust.res$samID <- rownames(pseudo.moic.res$clust.res)
pseudo.moic.res$clust.res$clust <- sapply(pseudo.moic.res$clust.res$PAM50,
switch,
"Basal" = 1, # relabel Basal as 1
"Her2" = 2, # relabel Her2 as 2
"LumA" = 3, # relabel LumA as 3
"LumB" = 4, # relabel LumnB as 4
"Normal" = 5) # relabel Normal as 5
head(pseudo.moic.res$clust.res)
fustat futime PAM50 pstage age samID clust
BRCA-A03L-01A 0 2442 LumA T3 34 BRCA-A03L-01A 3
BRCA-A04R-01A 0 2499 LumB T1 36 BRCA-A04R-01A 4
BRCA-A075-01A 0 518 LumB T2 42 BRCA-A075-01A 4
BRCA-A08O-01A 0 943 LumA T2 45 BRCA-A08O-01A 3
BRCA-A0A6-01A 0 640 LumA T2 64 BRCA-A0A6-01A 3
BRCA-A0AD-01A 0 1157 LumA T1 83 BRCA-A0AD-01A 3
pam50.brca <- compSurv(moic.res = pseudo.moic.res,
surv.info = surv.info,
convt.time = "y", # convert day unit to year
surv.median.line = "h", # draw horizontal line at median survival
fig.name = "KAPLAN-MEIER CURVE OF PAM50 BY PSEUDO")
Secssion Information
sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)
Matrix products: default
locale:
[1] LC_COLLATE=Chinese (Simplified)_China.936
[2] LC_CTYPE=Chinese (Simplified)_China.936
[3] LC_MONETARY=Chinese (Simplified)_China.936
[4] LC_NUMERIC=C
[5] LC_TIME=Chinese (Simplified)_China.936
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] MOVICS_0.99.16 Biobase_2.52.0 BiocGenerics_0.38.0
[4] rmdformats_1.0.2 knitr_1.33 pacman_0.5.1
[7] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.6
[10] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3
[13] tibble_3.1.2 ggplot2_3.3.3 tidyverse_1.3.1
loaded via a namespace (and not attached):
[1] rsvd_1.0.5 corpcor_1.6.9
[3] aricode_1.0.0 class_7.3-19
[5] foreach_1.5.1 crayon_1.4.1
[7] MASS_7.3-54 rhdf5filters_1.4.0
[9] nlme_3.1-152 backports_1.2.1
[11] reprex_2.0.0 sva_3.40.0
[13] impute_1.66.0 GOSemSim_2.18.0
[15] rlang_0.4.11 svd_0.5
[17] XVector_0.32.0 readxl_1.3.1
[19] heatmap.plus_1.3 irlba_2.3.3
[21] nloptr_1.2.2.2 limma_3.48.0
[23] BiocParallel_1.26.0 rjson_0.2.20
[25] bit64_4.0.5 glue_1.4.2
[27] rngtools_1.5 SNFtool_2.2
[29] AnnotationDbi_1.54.0 oompaData_3.1.1
[31] DOSE_3.18.0 haven_2.4.1
[33] tidyselect_1.1.1 SummarizedExperiment_1.22.0
[35] km.ci_0.5-2 rio_0.5.26
[37] XML_3.99-0.6 zoo_1.8-9
[39] ggpubr_0.4.0 ridge_2.9
[41] maftools_2.8.0 xtable_1.8-4
[43] magrittr_2.0.1 evaluate_0.14
[45] cli_2.5.0 zlibbioc_1.38.0
[47] rstudioapi_0.13 fastmatch_1.1-0
[49] ClassDiscovery_3.3.13 InterSIM_2.2.0
[51] treeio_1.16.1 GSVA_1.40.0
[53] BiocSingular_1.8.0 xfun_0.23
[55] coca_1.1.0 clue_0.3-59
[57] cluster_2.1.2 caTools_1.18.2
[59] tidygraph_1.2.0 ggtext_0.1.1
[61] KEGGREST_1.32.0 flexclust_1.4-0
[63] ggrepel_0.9.1 ape_5.5
[65] permute_0.9-5 Biostrings_2.60.0
[67] png_0.1-7 withr_2.4.2
[69] bitops_1.0-7 ggforce_0.3.3
[71] plyr_1.8.6 cellranger_1.1.0
[73] GSEABase_1.54.0 e1071_1.7-7
[75] survey_4.0
[ reached getOption("max.print") -- omitted 167 entries ]
參考資料:
MOVICS:Multi-Omics integration and VIsualization in Cancer Subtyping(MOVICS VIGNETTE)
Lu, X., Meng, J., Zhou, Y., Jiang, L., and Yan, F. (2020). MOVICS: an R package for multi-omics integration and visualization in cancer subtyping. bioRxiv, 2020.2009.2015.297820. [doi.org/10.1101/2020.09.15.297820]