書接上回:http://www.reibang.com/p/c7597357a915
ER相關(guān)的基因
endoplasmic reticulum stress
gs = rio::import("GeneCards-SearchResults.csv")
k = gs$`Relevance score`>=7;table(k)
## k
## FALSE TRUE
## 6267 802
gs = gs[k,]
兩種算法計算每個基因的p值肢藐,并取交集
4個數(shù)據(jù)需要循環(huán)流译,先拿其中一個數(shù)據(jù)跑通豁陆,再把代碼寫入循環(huán)
library(tinyarray)
load("../1_data_pre/exp_surv1.Rdata")
exp1 = exp1[rownames(exp1) %in% gs$`Gene Symbol`,]
c1 = surv_cox(exp1,surv1,continuous = T)
head(c1)
## coef se z p HR HRse HRz
## UBE2J2 1.4404365 0.13446765 10.712141 0.000000e+00 4.222539 0.5677949 5.675533
## RER1 1.8780151 0.11813991 15.896534 0.000000e+00 6.540509 0.7726952 7.170369
## ICMT 1.0621919 0.11658641 9.110770 0.000000e+00 2.892705 0.3372500 5.612170
## PARK7 0.6545554 0.13911446 4.705157 2.536705e-06 1.924287 0.2676961 3.452746
## H6PD 0.8217466 0.10642461 7.721396 1.154632e-14 2.274469 0.2420595 5.265107
## FBXO6 0.9195681 0.08969749 10.251882 0.000000e+00 2.508207 0.2249799 6.703741
## HRp HRCILL HRCIUL
## UBE2J2 1.382573e-08 3.244252 5.495823
## RER1 7.479573e-13 5.188606 8.244654
## ICMT 1.998047e-08 2.301789 3.635320
## PARK7 5.549107e-04 1.465060 2.527460
## H6PD 1.401079e-07 1.846253 2.802004
## FBXO6 2.031497e-11 2.103840 2.990295
nrow(c1)
## [1] 653
k1 = surv_KM(exp1,surv1)
head(k1) #p值實在太小,逼近0
## UBE2J2 RER1 FBXO6 DDOST CDC42 SELENON
## 0 0 0 0 0 0
length(k1)
## [1] 642
g1 = intersect(rownames(c1),names(k1))
head(g1)
## [1] "UBE2J2" "RER1" "ICMT" "PARK7" "H6PD" "FBXO6"
length(g1)
## [1] 615
循環(huán)
load("../1_data_pre/exp_surv2.Rdata")
load("../1_data_pre/exp_surv3.Rdata")
load("../1_data_pre/exp_surv4.Rdata")
exp = list(exp1,exp2,exp3,exp4)
surv = list(surv1,surv2,surv3,surv4)
g = list()
for(i in 1:4){
exp[[i]] = exp[[i]][rownames(exp[[i]]) %in% gs$`Gene Symbol`,]
c1 = surv_cox(exp[[i]],surv[[i]],continuous = T)
k = surv_KM(exp[[i]],surv[[i]])
g[[i]] = intersect(rownames(c1),names(k))
}
names(g) = c("TCGA","CGGA_array","CGGA","GSE16011")
sapply(g, length)
## TCGA CGGA_array CGGA GSE16011
## 615 413 551 335
v_plot = draw_venn(g,"")
ggplot2::ggsave(v_plot,filename = "venn.png")
4個數(shù)據(jù),兩種算法p<0.05的基因取交集,得到195個基因付翁,用于lasso回歸
g = intersect_all(g)
length(g)
## [1] 195
lasso回歸
使用TCGA的數(shù)據(jù)構(gòu)建模型
library(survival)
x = t(exp1[g,])
y = data.matrix(Surv(surv1$time,surv1$event))
library(glmnet)
set.seed(10210)
cvfit = cv.glmnet(x, y, family="cox")
fit=glmnet(x, y, family = "cox")
coef = coef(fit, s = cvfit$lambda.min)
index = which(coef != 0)
actCoef = coef[index]
lassoGene = row.names(coef)[index]
lassoGene
## [1] "GPX7" "MR1" "SHISA5" "CP" "PPM1L" "DNAJB11"
## [7] "SNCA" "ANXA5" "HFE" "EPM2A" "FKBP14" "BET1"
## [13] "SERPINE1" "CASP2" "BAG1" "SIRT1" "DNAJB12" "ERLIN1"
## [19] "CYP2E1" "CASP4" "SLN" "ERP27" "DDIT3" "ATP2A2"
## [25] "ERP29" "MYH7" "CDKN3" "MAPK3" "MMP2" "NOL3"
## [31] "BRCA1" "PRNP" "MX1" "RNF185" "SREBF2" "GLA"
par(mfrow = c(1,2))
plot(cvfit)
plot(fit,xvar="lambda",label = F)
逐步回歸法
library(My.stepwise)
vl <- lassoGene
dat = cbind(surv1,t(exp1[lassoGene,]))
# My.stepwise.coxph(Time = "time",
# Status = "event",
# variable.list = vl,
# data = dat)
model = coxph(formula = Surv(time, event) ~ HFE + SHISA5 + BRCA1 + EPM2A +
ERLIN1 + GPX7 + SLN + DNAJB11 + MMP2 + NOL3 + CP + ATP2A2 +
GLA + MAPK3 + SREBF2 + CASP2 + SNCA + DDIT3 + BAG1 + ANXA5,
data = dat)
summary(model)$concordance
## C se(C)
## 0.865988496 0.009754695
genes = names(model$coefficients);length(genes)
## [1] 20
library(survminer)
ggforest(model,data = dat)
ggsave("forest.png",width = 10,height = 8)