xCell is a recently published method based on ssGSEA that estimates the abundance scores of 64 immune cell types, including adaptive and innate immune cells, hematopoietic progenitors, epithelial cells, and extracellular matrix cells
xcell 是基于ssGSEA(single-sample GSEA)
ssGSEA顧名思義是一種特殊的GSEA考阱,它主要針對(duì)單樣本無(wú)法做GSEA而提出的一種實(shí)現(xiàn)方法,原理上與GSEA是類似的谦去,不同的是GSEA需要準(zhǔn)備表達(dá)譜文件即gct截汪,根據(jù)表達(dá)譜文件計(jì)算每個(gè)基因的rank值
參考網(wǎng)址https://shengxin.ren/article/403和https://support.bioconductor.org/p/98463/
關(guān)于Xcell找對(duì)網(wǎng)址很重要功蜓,我一開(kāi)始找錯(cuò)了地方
https://github.com/dviraran/xCell
首先看read.me 很開(kāi)心是我要的東西
image.png
安裝這個(gè)之前經(jīng)常報(bào)錯(cuò)翘单,要安裝很多別的輔助包
install.packages('Rcpp')#########安裝各類程序包
devtools::install_github('dviraran/xCell')
image.png
安裝的時(shí)候還會(huì)有錯(cuò)誤。
安裝好的這一刻阳液,還是很開(kāi)心的繁疤。
image.png
使用方法
第一步 計(jì)算xCell
library(xCell)
exprMatrix = read.table(file = '/Users/chenyuqiao/Desktop/TCGA-LUAD.htseq_counts.tsv',header=TRUE,row.names=1, as.is=TRUE)
xCellAnalysis(exprMatrix)
data imput
library(xCell)
exprMatrix = read.table(file = '/Users/chenyuqiao/Desktop/TCGA-LUAD.htseq_counts.tsv',header=TRUE,row.names=1, as.is=TRUE)
###exprMatrix<- exprMatrix[1:10,1:10]
Ensemble_ID<- rownames(exprMatrix)
ID<- strsplit(Ensemble_ID, "[.]")
str(ID)
IDlast<- sapply(ID, "[", 1)
exprMatrix$Ensemble_ID<- IDlast
row.names(exprMatrix)<- exprMatrix$Ensemble_ID
save(exprMatrix, file = 'TCGA.Rdata')
load(file = 'TCGA.Rdata')
####library(clusterProfiler)
library(org.Hs.eg.db)
ls("package:org.Hs.eg.db")
g2s=toTable(org.Hs.egSYMBOL);head(g2s)
g2e=toTable(org.Hs.egENSEMBL);head(g2e)
tmp=merge(g2e,g2s,by='gene_id')
head(tmp)
colnames(exprMatrix)[ncol(exprMatrix)] <- c("ensembl_id")###################重命名Ensemble_ID 便于后面merge
exprMatrix[1:4,1:4]
exprMatrix<- merge(tmp,exprMatrix,by='ensembl_id')
exprMatrix[1:4,1:4]
exprMatrix<- exprMatrix[,- c(1,2)]
exprMatrix=exprMatrix[!duplicated(exprMatrix$symbol),]
row.names(exprMatrix)<- exprMatrix[,1]
exprMatrix<- exprMatrix[,-1]
exprMatrix[1:4,1:4]
xCellAnalysis(exprMatrix)####################一句話就分析完成了
##save(results,file = 'Xcell_result.Rdata')#############需要重新修改
第二步:批量生存分析
load(file = 'Xcell_result.Rdata')
result<- as.data.frame(result)
library(dplyr)
library(tidyverse)
TCGA.LUAD.GDC_phenotype <- read.delim("TCGA-LUAD.GDC_phenotype.tsv")
#colnames(TCGA.LUAD.GDC_phenotype)
#head(TCGA.LUAD.GDC_phenotype)
LUAD_Pheno<- select(TCGA.LUAD.GDC_phenotype, "submitter_id.samples", "vital_status.diagnoses", "days_to_death.diagnoses", "days_to_last_follow_up.diagnoses", "pathologic_N", "pathologic_M", "days_to_new_tumor_event_after_initial_treatment")
LUAD_Pheno<- LUAD_Pheno[grep('01A',LUAD_Pheno$submitter_id.samples),] #####只篩選01A的 01A代表腫瘤
LUAD_Pheno[is.na(LUAD_Pheno)]<- 0
LUAD_Pheno$PFS_status<- ifelse((LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment == 0 & LUAD_Pheno$days_to_death.diagnoses == 0), 0,1)
##################################
LUAD_Pheno$OS<- ifelse(LUAD_Pheno$days_to_last_follow_up.diagnoses > LUAD_Pheno$days_to_death.diagnoses, LUAD_Pheno$days_to_last_follow_up.diagnoses,LUAD_Pheno$days_to_death.diagnoses)
LUAD_Pheno$PFS<- ifelse(LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment == 0, LUAD_Pheno$OS ,LUAD_Pheno$days_to_new_tumor_event_after_initial_treatment)
LUAD_Pheno$OS_status<- as.factor(LUAD_Pheno$vital_status.diagnoses)
#############################設(shè)計(jì)好分組
#############################生存曲線
firstdata<- result ###############expre
firstdata$ID<- rownames(firstdata)
gene<- row.names(firstdata)
#######select only gene to analysis
library(survminer)
library(survival)
library(ggplot2)
library(dplyr)
for (x in gene) {
RNA_seq_data<-filter(firstdata, firstdata$ID == x)
RNA_seq_data<- t(RNA_seq_data)
RNA_seq_data<- as.data.frame(RNA_seq_data)
# str(RNA_seq_data)
# colnames(LUAD_Pheno)
RNA_seq_data$submitter_id.samples<- row.names(RNA_seq_data)
colnames(RNA_seq_data)<- c("Expressionvalue","submitter_id.samples")
LUAD_Pheno$submitter_id.samples<- as.character(LUAD_Pheno$submitter_id.samples)
LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
LUAD_Pheno$submitter_id.samples<- sub('-', '.', LUAD_Pheno$submitter_id.samples)#############- replaced by .
finaldata<- inner_join(LUAD_Pheno,RNA_seq_data, by = "submitter_id.samples")
finaldata$PFS_status<- as.character(finaldata$PFS_status)
finaldata$PFS_status<- as.numeric(as.factor(finaldata$PFS_status))
finaldata$Expressionvalue<- as.numeric(as.character(finaldata$Expressionvalue))
finaldata$group<- ifelse(finaldata$Expressionvalue>median(finaldata$Expressionvalue),'high','low')
library(survminer)
library(survival)
fit <- survfit(Surv(finaldata$PFS,finaldata$PFS_status)~finaldata$group, data=finaldata)
summary(fit)
pp<- ggsurvplot(fit, data = finaldata, conf.int = F, pval = TRUE,
xlim = c(0,2000), # present narrower X axis, but not affect
# survival estimates.
xlab = "Time in days", # customize X axis label.
break.time.by = 200) # break X axis in time intervals by 500.
ggsave(filename = paste("plot_",x,".pdf",sep = ""))
print(x)
}