獲取表達(dá)矩陣闯两,處理TCGA的count數(shù)據(jù),1表示為行拱烁。
exp = exp[apply(exp, 1, function(x) sum(x > 1) > 9), ]
dim(exp)
exp[1:4,1:4]
exp = as.matrix(exp)
導(dǎo)入數(shù)據(jù)
library(rio)
count <- import("GSE168184_GEO_submission_FPKM.txt",
format = "\t")
加 ENTREZID列生蚁,用于富集分析(symbol轉(zhuǎn)entrezid,然后inner_join)
library(clusterProfiler)
library(org.Hs.eg.db)
s2e <- bitr(deg$symbol,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人類
dim(deg)
library(dplyr)
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
dim(deg)
轉(zhuǎn)化空格為NA
clinical[clinical==""] = NA
用花花的專屬TCGA包戏自,ID進(jìn)行轉(zhuǎn)換
rm(list=ls())
load("TCGA-CHOL_gdc.Rdata")
library(tinyarray)
k = tinyarray::trans_exp(exp)
把空著的值改為NA
#簡(jiǎn)化meta的列名
colnames(meta)=c('ID','event','death','last_followup','race','age','gender','stage')
#空著的值改為NA
meta[meta==""]=NA
以病人為中心邦投,表達(dá)矩陣按病人ID去重復(fù)
# 以病人為中心,表達(dá)矩陣按病人ID去重復(fù)
k = !duplicated(str_sub(colnames(exprSet),1,12));table(k)
exprSet = exprSet[,k]
去除重復(fù)
an = gtf_gene[,c("gene_name","gene_id","gene_type")]
exp = exp[rownames(exp) %in% an$gene_id,]
an = an[match(rownames(exp),an$gene_id),]
identical(an$gene_id,rownames(exp))
k = !duplicated(an$gene_name);table(k)
an = an[k,]
exp = exp[k,]
TPM數(shù)據(jù)做單個(gè)基因的生存分析file:///C:/Users/denghuan/Desktop/The%20learning%20of%20R%20software/Practice/%E7%94%9F%E5%AD%98%E5%88%86%E6%9E%90%20survival%20analysis/6.Survival.html
#單個(gè)基因
g = "TRIP13"
meta$gene = ifelse(exprSet[g,]>median(exprSet[g,]),
'high',
'low')
sfit1=survfit(Surv(time, event)~gene, data=meta)
ggsurvplot(sfit1,pval =TRUE, data = meta,
risk.table = TRUE)
ggsurvplot(sfit1,palette = c("#E7B800", "#2E9FDF"),
risk.table =TRUE,pval =TRUE,
conf.int =TRUE,xlab ="Time in months",
ggtheme =theme_light(),
ncensor.plot = TRUE)
##方法2:生信自學(xué)網(wǎng)
diff=survdiff(Surv(time, event)~gene, data=meta)
pValue=1-pchisq(diff$chisq,df=1)
if(pValue<0.001){
pValue=paste0("=",round(pValue,4))
}else{
pValue=paste0("=",round(pValue,3))
}
surPlot=ggsurvplot(sfit1,
data=meta,
conf.int=TRUE,
pval=paste0("p",pValue),
pval.size=6,
risk.table=T,
legend.labs=c("high","low"),
legend.title=paste0(g," level"),
xlab="Time(years)",
break.time.by = 1,
risk.table.title="",
palette=c("red", "blue"),
risk.table.height=.25)
print(surPlot)
stringr::str_replace_all()
str_detect(colnames(exp),"TCGA-W5-AA2R")