在臨床研究統(tǒng)計(jì)分析中,臨床基線表對于展示研究數(shù)據(jù)的信息和結(jié)構(gòu)尤為重要。
本文以R語言中自帶臨床數(shù)據(jù)集lung為例,展示基本的COX回歸與臨床基線表的制作痊末。
首先是數(shù)據(jù)的生存分析
#獲取R自帶數(shù)據(jù)集lung
lung <- lung
#查看數(shù)據(jù)的基本結(jié)構(gòu)
head(lung)
# 如下所示:
# inst time status age sex ph.ecog ph.karno pat.karno
# 1 3 306 2 74 1 1 90 100
# 2 3 455 2 68 1 0 90 90
# 3 3 1010 1 56 1 0 90 90
# 4 5 210 2 57 1 1 90 60
# 5 1 883 2 60 1 0 100 90
# 6 12 1022 1 74 1 1 50 80
# meal.cal wt.loss
# 1 1175 NA
# 2 1225 15
# 3 NA 15
# 4 1150 11
# 5 NA 0
# 6 513 0
#----------------------生存分析(Kaplan-Meier曲線)----------------------#
# 基本流程:
# 1)Surv()提取生存時間
# 2)如果不分組蚕苇,就用survfit()擬合模型
# 3)如果有分組哩掺,就用survdiff()分析差異
# 1)Surv()提取生存時間
Lusurv <- Surv(time = lung$time,event = lung$status)
# 2)不分組,就用survfit()擬合模型并作圖
Lufit <- survfit(Lusurv~1)
plot(Lufit)
# 3)有分組涩笤,就用survdiff()分析分組差異再作圖
Lufit <- survfit(Lusurv~lung$sex)
plot(Lufit,conf.int = "none",
col = c("red","blue"),
lwd = 2,xlab = "Time(month)",ylab = "Survival Probability",
mark.time = T)
abline(h=0.5,lty=3) #中位生存時間線
legend("bottomleft",c("Male","Female"),
col = c("red","blue"),lwd = 2)
# 查看不同性別生存時間是否差異顯著
survdiff(Lusurv~lung$sex)
#如下:
# Call:
# survdiff(formula = Lusurv ~ lung$sex)
#
# N Observed Expected (O-E)^2/E (O-E)^2/V
# lung$sex=1 138 112 91.6 4.55 10.3
# lung$sex=2 90 53 73.4 5.68 10.3
#
# Chisq= 10.3 on 1 degrees of freedom, p= 0.001
#可以看出p= 0.001 嚼吞,不同性別生存時間有顯著差異
然后是單因素COX回歸
#-----------------單因素COX回歸-----------------#
# 提取生存時間
Lusurv <- Surv(time = lung$time,event = lung$status)
# 將生存時間加入原先的lung數(shù)據(jù)(dataframe)
lung$Lusurv <- with(lung,Lusurv)
#單因素cox回歸(性別)
cox_sex <- coxph(Lusurv~sex,data = lung)
# 我們需要的是風(fēng)險比,95%置信區(qū)間和pvalue
cox_sexSum <- summary(cox_sex)
cox_sexSum$coefficients # exp(coef) 即風(fēng)險比
# coef exp(coef) se(coef) z Pr(>|z|)
# sex -0.531 0.588 0.167 -3.18 0.00149
cox_sexSum$conf.int # lower .95 upper .95 即置信區(qū)間
# exp(coef) exp(-coef) lower .95 upper .95
# sex 0.588 1.7 0.424 0.816
# paste0d的collapse參數(shù)蹬碧,可以把這些字符串拼成一個長字符串舱禽,而不是放在一個向量中
HR <- round(cox_sexSum$coefficients[,2],2)
PValue <- round(cox_sexSum$coefficients[,5],3) #保留3位
CI <- paste0(round(cox_sexSum$conf.int[,3:4],2),collapse = '-')
Unicox <- data.frame("Characteristics" = "sex",
"Hazard Ratio" = HR,
"CI95" = CI,
"P Value" = PValue)
#----------自定義制作表格函數(shù)Unicoxtable()---------#
Unicoxtable <- function(x){
FML <- as.formula(paste0("Lusurv~",x))
cox_sex <- coxph(FML,data = lung)
cox_sexSum <- summary(cox_sex)
HR <- round(cox_sexSum$coefficients[,2],2)
PValue <- round(cox_sexSum$coefficients[,5],3) #保留3位
CI <- paste0(round(cox_sexSum$conf.int[,3:4],2),collapse = '-')
Unicox <- data.frame("Characteristics" = x,
"Hazard Ratio" = HR,
"CI95" = CI,
"P Value" = PValue)
return(Unicox)
}
#將變量名輸入即可,例如age
Unicoxtable("age")
#結(jié)果如下:
# Characteristics Hazard.Ratio CI95 P.Value
# 1 age 1.02 1-1.04 0.042
#---------多個變量同時處理--------#
library(plyr)
library(xlsx)
Varname <- colnames(lung)[-c(2:3,11)] #提取部分變量名
Univar <- lapply(Varname, Unicoxtable)
Univar <- ldply(Univar,data.frame)
# 將pvalue小于0.001的特殊顯示
Univar$P.Value[Univar$P.Value == 0] <- "<0.001"
#最后輸出文件
write.xlsx(Univar,"Unicoxtable.xlsx",
col.names = T,row.names = F,
showNA = F)
接下來是多因素COX回歸
#----------------------多因素COX回歸---------------------#
# 篩選pvalue小于0.05的變量用于后續(xù)多因素分析
Univar$Characteristics[Univar$P.Value<0.05]
fml <- as.formula(paste0("Lusurv~",paste0(Univar$Characteristics[Univar$P.Value<0.05],collapse = "+")))
MultiCox <- coxph(fml,data = lung)
MultiSum <- summary(MultiCox)
MultiName <- as.character(Univar$Characteristics[Univar$P.Value<0.05])
MHR <- round(MultiSum$coefficients[,2],2)
MPValue <- round(MultiSum$coefficients[,5],3)
MCILow <- round(MultiSum$conf.int[,3],2)
MCIUp <- round(MultiSum$conf.int[,4],2)
MCI <- paste0(MCILow,"-",MCIUp)
MulCox <- data.frame("Characteristics" = MultiName,
"Hazard Ratio" = MHR,
"CI95" = MCI,
"P Value" = MPValue)
MulCox$P.Value[MulCox$P.Value == 0] <- "<0.001"
#輸出結(jié)果
write.xlsx(MulCox,"MulCoxtable.xlsx",
col.names = T,row.names = F,
showNA = F)
#---------------------將單因素和多因素的數(shù)據(jù)框合并---------------------#
# 兩個數(shù)據(jù)庫行不一樣恩沽,按照變量名匹配
outPut <- merge.data.frame(Univar,MulCox,
by="Characteristics",
all = T,sort = T)
#輸出結(jié)果
write.xlsx(outPut,"COXtable.xlsx",
col.names = T,row.names = F,
showNA = F)
至此誊稚,就完成了生存分析、單/多因素COX回歸以及基線表的制作
附詩一首
晚風(fēng)吻盡荷花葉
任我醉倒在池邊
夜夢美人來相伴
乍醒猶思?xì)埦铺?/strong>