數(shù)據(jù)分析：基于GSVA的通路富集分析

與傳統(tǒng)的Over Representation Analysis: the up- or down-regulated DEGs和Gene Set Enrichment Analysis: All the ranked genes相比晌区，Gene Set Variation Analysis分析直接使用gene expression或RNA-seq profile matrix計算基因在通路中的得分诅诱，并且這種計算可以基于單樣本處理。

GSVA方法圖解

輸入表達譜數(shù)據(jù)和基因集數(shù)據(jù)霍转。計算其得分刨啸。

The GSVA package implements a non-parametric unsupervised method, called Gene Set Variation
Analysis (GSVA), for assessing gene set enrichment (GSE) in gene expression microarray and RNA-
seq data. In contrast to most GSE methods, GSVA performs a change in coordinate systems,
transforming the data from a gene by sample matrix to a gene set by sample matrix. Thereby
allowing for the evaluation of pathway enrichment for each sample. This transformation is done
without the use of a phenotype, thus facilitating very powerful and open-ended analyses in a now
pathway centric manner.

加載R包

knitr::opts_chunk$set(warning = F, message = F)
library(dplyr)
library(tibble)
library(ggplot2)
library(data.table)
library(GSVA)

rm(list = ls())
options(stringsAsFactors = F)
options(future.globals.maxSize = 1000 * 1024^2)

grp <- c("S1", "S2")
grp.col <- c("#6C326C", "#77A2D1")

load data

gene Expression DESeq2 object

phen <- fread("../../Result/phenotype/phenotype_cluster.csv")
geneExp <- fread("../../Result/profile/geneExp_filter.tsv")
# 處理數(shù)據(jù)成整型
geneExp <- geneExp %>% column_to_rownames("V1") %>% as.matrix()
geneExp <- round(geneExp) %>% data.frame() %>% rownames_to_column("V1")

pathways_hallmark_kegg <- fgsea::gmtPathways("../../Result/GeneID/msigdb.v7.1.symbols_KEGG.gmt")
pathways_hallmark_GO <- fgsea::gmtPathways("../../Result/GeneID/msigdb.v7.1.symbols_GO.gmt")

Curation Function

get_ExprSet <- function(x=phen, 
                        y=geneExp){
  # x=phen
  # y=geneExp

  sid <- intersect(x$Barcode, colnames(y))
  # phenotype
  phe <- x %>% filter(Barcode%in%sid) %>%
    mutate(Cluster=factor(as.character(Cluster))) %>%
    column_to_rownames("Barcode") 
  
  # profile 
  prf <- y %>% column_to_rownames("V1") %>%
    dplyr::select(all_of(sid))
  
  # determine the right order between profile and phenotype 
  for(i in 1:ncol(prf)){ 
    if (!(colnames(prf)[i] == rownames(phe)[i])) {
      stop(paste0(i, " Wrong"))
    }
  }
  require(convert)
  exprs <- as.matrix(prf)
  adf <-  new("AnnotatedDataFrame", data=phe)
  experimentData <- new("MIAME",
        name="Hua Zou", lab="Hua Lab",
        contact="zouhua1@outlook.com",
        title="KRIC Experiment",
        abstract="The ExpressionSet",
        url="www.zouhua.top",
        other=list(notes="Created from text files"))
  expressionSet <- new("ExpressionSet", exprs=exprs,
                       phenoData=adf, 
                       experimentData=experimentData)
  
  return(expressionSet)
}

get_score <- function(dataset=get_ExprSet(x=phen, y=geneExp),
                      geneset=pathways_hallmark_kegg,
                      methods="ssgsea"){
  
  # dataset=get_ExprSet(x=phen, y=geneExp)
  # geneset=pathways_hallmark_kegg
  # methods="ssgsea"  
  
  dat_fit <- gsva(expr = dataset, 
              gset.idx.list = geneset,
              method = methods, 
              min.sz = 5, 
              max.sz = 500,
              kcdf = "Poisson")
  res <- exprs(dat_fit) %>% t() %>% 
    data.frame() %>%
    rownames_to_column("SampleID")
  
  return(res)
}

# Differential methylation/copyNumber/methylation
get_limma <- function(dataset=methylation_set,
                      group_col=grp,
                      tag="methylation",
                      fc=1,
                      pval=0.05){
  
  # dataset=methylation_set
  # group_col=grp
  # tag="methylation"  
  # fc=1
  # pval=0.05
  
  pheno <- pData(dataset) 
  
  if(tag == "methylation"){
    # transform the beta value into M values via lumi package
    require(lumi)
    edata <- beta2m(exprs(dataset))    
  }else{
    edata <- exprs(dataset)
  }

  
  require(limma)
  design <- model.matrix(~0 + pheno$Cluster)
  rownames(design) <- rownames(pheno)
  colnames(design) <- group_col
  exprSet <- edata  
  
  # show distribution
  boxplot(exprSet)
  plotDensities(exprSet) 
  
  # linear fitting 
  fit <- lmFit(exprSet, design, method = 'ls')
  contrast <- makeContrasts("S1-S2", levels = design) 
  
  # eBayes
  fit2 <- contrasts.fit(fit, contrast)
  fit2 <- eBayes(fit2)
  
  qqt(fit2$t, df = fit2$df.prior + fit2$df.residual, pch = 16, cex = 0.2)
  abline(0, 1)  

  # differential features
  diff_feature <- topTable(fit2, number = Inf, adjust.method = 'BH') %>%
    rownames_to_column("Feature") 
  
  # prof[rownames(prof)%in%"EVI2A", ] %>% data.frame() %>% setNames("EVI2A") %>%
  #   rownames_to_column("SampleID") %>%
  #   inner_join(pheno %>%rownames_to_column("SampleID"), by = "SampleID") -> a1
  # wilcox.test(EVI2A~Cluster, data = a1)
  # ggplot(a1, aes(x=Cluster, y=EVI2A))+geom_boxplot() 
  
  diff_feature[which(diff_feature$logFC >= fc & 
                       diff_feature$adj.P.Val < pval), "Enrichment"] <- group_col[1]
  diff_feature[which(diff_feature$logFC <= -fc & 
                       diff_feature$adj.P.Val < pval), "Enrichment"] <- group_col[2]
  diff_feature[which(abs(diff_feature$logFC) < fc |
                       diff_feature$adj.P.Val >= pval), "Enrichment"] <- "Nonsignif"
  
  diff_res <- diff_feature %>% 
    setNames(c("Feature", "log2FoldChange", "baseMean", "t", 
               "pvalue", "padj", "B", "Enrichment")) %>% 
    dplyr::select(Feature, everything()) %>%
    arrange(padj, log2FoldChange) %>%
    column_to_rownames("Feature")
  
  res <- list(fit=fit2, diff_res=diff_res)
  
  return(res)
}

gsva score

kegg_gsva <- get_score(dataset=get_ExprSet(x=phen, y=geneExp),
                       geneset=pathways_hallmark_kegg,
                       methods="gsva")

if(!dir.exists("../../Result/pathway/")){
  dir.create("../../Result/pathway/", recursive = T)
}
write.csv(kegg_gsva, "../../Result/pathway/KEGG_gsva.csv", row.names = F)

# Differential Test
kegg_gsva_exprset <-  get_ExprSet(x=phen, y=kegg_gsva %>% column_to_rownames("SampleID") %>% t() %>% data.frame() %>%rownames_to_column("V1"))  
Diff_gsva <- get_limma(dataset =kegg_gsva_exprset, tag = "gsva")
DT::datatable(Diff_gsva$diff_res)

網(wǎng)盤地址： https://pan.baidu.com/s/1AGsvOD6klfljvu0T2A-akA 玲献；提取碼：vw7z

version

sessionInfo()

R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    
system code page: 936

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] GSVA_1.36.3       data.table_1.13.6 ggplot2_3.3.3     tibble_3.0.3      dplyr_1.0.2      

loaded via a namespace (and not attached):
  [1] fgsea_1.14.0                colorspace_1.4-1            ellipsis_0.3.1             
  [4] ggridges_0.5.2              qvalue_2.20.0               XVector_0.28.0             
  [7] GenomicRanges_1.40.0        rstudioapi_0.11             farver_2.0.3               
 [10] urltools_1.7.3              graphlayouts_0.7.1          ggrepel_0.9.1.9999         
 [13] DT_0.16                     bit64_0.9-7                 AnnotationDbi_1.50.3       
 [16] scatterpie_0.1.5            xml2_1.3.2                  splines_4.0.2              
 [19] GOSemSim_2.14.2             knitr_1.30                  shinythemes_1.1.2          
 [22] polyclip_1.10-0             jsonlite_1.7.1              annotate_1.66.0            
 [25] GO.db_3.11.4                graph_1.66.0                ggforce_0.3.2              
 [28] shiny_1.5.0                 BiocManager_1.30.10         compiler_4.0.2             
 [31] httr_1.4.2                  rvcheck_0.1.8               Matrix_1.3-2               
 [34] fastmap_1.0.1               later_1.1.0.1               tweenr_1.0.1               
 [37] htmltools_0.5.0             prettyunits_1.1.1           tools_4.0.2                
 [40] igraph_1.2.5                gtable_0.3.0                glue_1.4.2                 
 [43] GenomeInfoDbData_1.2.3      reshape2_1.4.4              DO.db_2.9                  
 [46] fastmatch_1.1-0             Rcpp_1.0.5                  enrichplot_1.8.1           
 [49] Biobase_2.48.0              vctrs_0.3.4                 ggraph_2.0.3               
 [52] xfun_0.19                   stringr_1.4.0               mime_0.9                   
 [55] lifecycle_0.2.0             XML_3.99-0.5                DOSE_3.14.0                
 [58] europepmc_0.4               MASS_7.3-53                 zlibbioc_1.34.0            
 [61] scales_1.1.1                tidygraph_1.2.0             hms_0.5.3                  
 [64] promises_1.1.1              parallel_4.0.2              SummarizedExperiment_1.18.2
 [67] RColorBrewer_1.1-2          yaml_2.2.1                  memoise_1.1.0              
 [70] gridExtra_2.3               triebeard_0.3.0             stringi_1.5.3              
 [73] RSQLite_2.2.0               S4Vectors_0.26.1            BiocGenerics_0.34.0        
 [76] BiocParallel_1.22.0         GenomeInfoDb_1.24.2         rlang_0.4.7                
 [79] pkgconfig_2.0.3             bitops_1.0-6                matrixStats_0.57.0         
 [82] evaluate_0.14               lattice_0.20-41             purrr_0.3.4                
 [85] htmlwidgets_1.5.2           cowplot_1.1.1               bit_4.0.3                  
 [88] tidyselect_1.1.0            GSEABase_1.50.1             plyr_1.8.6                 
 [91] magrittr_1.5                R6_2.5.0                    IRanges_2.22.2             
 [94] generics_0.1.0              DelayedArray_0.14.1         DBI_1.1.0                  
 [97] pillar_1.4.6                withr_2.3.0                 RCurl_1.98-1.2             
[100] crayon_1.3.4                rmarkdown_2.5               viridis_0.5.1              
[103] progress_1.2.2              grid_4.0.2                  blob_1.2.1                 
[106] digest_0.6.25               xtable_1.8-4                tidyr_1.1.2                
[109] httpuv_1.5.4                gridGraphics_0.5-0          stats4_4.0.2               
[112] munsell_0.5.0               viridisLite_0.3.0           ggplotify_0.0.5

Reference

GSVA: gene set variation analysis for microarray and RNA-Seq data

最后編輯于：2024.06.08 09:30:03

?著作權歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者

禁止轉(zhuǎn)載廓啊，如需轉(zhuǎn)載請通過簡信或評論聯(lián)系作者羽圃。

人面猴
序言：七十年代末乾胶，一起剝皮案震驚了整個濱河市，隨后出現(xiàn)的幾起案子朽寞，更是在濱河造成了極大的恐慌识窿，老刑警劉巖，帶你破解...
沈念sama閱讀 216,744評論 6贊 502
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件脑融，死亡現(xiàn)場離奇詭異喻频，居然都是意外死亡，警方通過查閱死者的電腦和手機肘迎，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 92,505評論 3贊 392
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門甥温，熙熙樓的掌柜王于貴愁眉苦臉地迎上來，“玉大人妓布，你說我怎么就攤上這事姻蚓。” “怎么了匣沼？”我有些...
開封第一講書人閱讀 163,105評論 0贊 353
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵狰挡，是天一觀的道長。經(jīng)常有香客問我释涛，道長加叁，這世上最難降的妖魔是什么？我笑而不...
開封第一講書人閱讀 58,242評論 1贊 292
?港島之戀（遺憾婚禮）
正文為了忘掉前任枢贿，我火速辦了婚禮殉农，結(jié)果婚禮上，老公的妹妹穿的比我還像新娘局荚。我一直安慰自己超凳，他們只是感情好，可當我...
茶點故事閱讀 67,269評論 6贊 389
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布耀态。她就那樣靜靜地躺著轮傍，像睡著了一般。火紅的嫁衣襯著肌膚如雪首装。梳的紋絲不亂的頭發(fā)上创夜，一...
開封第一講書人閱讀 51,215評論 1贊 299
城市分裂傳說
那天，我揣著相機與錄音仙逻，去河邊找鬼驰吓。笑死涧尿，一個胖子當著我的面吹牛，可吹牛的內(nèi)容都是我干的檬贰。我是一名探鬼主播姑廉，決...
沈念sama閱讀 40,096評論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼，長吁一口氣：“原來是場噩夢啊……” “哼翁涤！你這毒婦竟也來了桥言？” 一聲冷哼從身側(cè)響起，我...
開封第一講書人閱讀 38,939評論 0贊 274
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤葵礼，失蹤者是張志新（化名）和其女友劉穎号阿，沒想到半個月后，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體鸳粉，經(jīng)...
沈念sama閱讀 45,354評論 1贊 311
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡扔涧，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內(nèi)容為張勛視角年9月15日...
茶點故事閱讀 37,573評論 2贊 333
?白月光啟示錄
正文我和宋清朗相戀三年，在試婚紗的時候發(fā)現(xiàn)自己被綠了赁严。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片扰柠。...
茶點故事閱讀 39,745評論 1贊 348
活死人
序言：一個原本活蹦亂跳的男人離奇死亡，死狀恐怖疼约，靈堂內(nèi)的尸體忽然破棺而出，到底是詐尸還是另有隱情蝙泼，我是刑警寧澤程剥，帶...
沈念sama閱讀 35,448評論 5贊 344
?日本核電站爆炸內(nèi)幕
正文年R本政府宣布，位于F島的核電站汤踏，受9級特大地震影響织鲸，放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜溪胶，卻給世界環(huán)境...
茶點故事閱讀 41,048評論 3贊 327
男人毒藥：我在死后第九天來索命
文/蒙蒙一搂擦、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧哗脖，春花似錦瀑踢、人聲如沸。這莊子的主人今日做“春日...
開封第一講書人閱讀 31,683評論 0贊 22
一樁弒父案橱夭，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽。三九已至桑逝，卻和暖如春棘劣，著一層夾襖步出監(jiān)牢的瞬間，已是汗流浹背楞遏。一陣腳步聲響...
開封第一講書人閱讀 32,838評論 1贊 269
情欲美人皮
我被黑心中介騙來泰國打工茬暇，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留首昔，地道東北人。一個月前我還...
沈念sama閱讀 47,776評論 2贊 369
代替公主和親
正文我出身青樓糙俗，卻偏偏與公主長得像沙廉，于是被迫代替她去往敵國和親。傳聞我的和親對象是個殘疾皇子臼节，可洞房花燭夜當晚...
茶點故事閱讀 44,652評論 2贊 354