# ROC 曲線(xiàn)——科研貓
數(shù)據(jù)解釋
第一列為二分類(lèi)結(jié)局變量,第2~n列為模型預(yù)測(cè)概率雅倒。
代碼如下:
options(stringsAsFactors = F)
options(warn = -1)
# 安裝包
#install.packages("pROC")
#install.packages("wesanderson")
#install.packages("openxlsx")
# 加載包
library(pROC)
library(wesanderson)
library(openxlsx)
# 數(shù)據(jù)要求,第一列為 y,后面為 x_1~x_n
data = read.xlsx("data.xlsx",sheet=1,rowNames = F,colNames = T,startRow = 1,
? ? ? ? ? ? ? ? detectDates = F,na.strings = "#NA")
# check groups (should be <= 5)
num.value = ncol(data) - 1
if(num.value > 5){
? print("Error: Too many groups of observations!")
? q(save="no")
}
name.value = colnames(data)[2:ncol(data)]
# define favoritable colors
col = wes_palette("Darjeeling1",num.value,type=c("discrete"))
# ROC curve
pdf("1.ROC_Combined.pdf",height = 4,width = 4)
for (i in 1:num.value){
? if(i == 1){
? ? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? ? percent=T,plot=T, grid=T,lty=i,
? ? ? ? ? ? ? ? ? print.auc=F,col=col[i])
? ? text(30,50,"AUC",font = 2,col="darkgray")
? ? text(30,50-10*i,
? ? ? ? paste(name.value[i],":",sprintf("%0.4f",as.numeric(roc.data$auc))),
? ? ? ? col=col[i])
? }else{
? ? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? ? percent=T,plot=T, grid=T, add=T,lty=i,
? ? ? ? ? ? ? ? ? print.auc=F,col=col[i])
? ? text(30,50-10*i,
? ? ? ? paste(name.value[i],":",sprintf("%0.4f",as.numeric(roc.data$auc))),
? ? ? ? col=col[i])
? }
}
dev.off()
# smooth ROC curve
pdf("2.ROC_with_smooth_curve_Combined.pdf",height = 4,width = 4)
for (i in 1:num.value){
? if(i == 1){
? ? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? ? percent=T,plot=T, grid=T,lty=i,smooth = T,
? ? ? ? ? ? ? ? ? print.auc=F,col=col[i])
? ? text(30,50,"AUC",font = 2,col="darkgray")
? ? text(30,50-10*i,
? ? ? ? paste(name.value[i],":",sprintf("%0.4f",as.numeric(roc.data$auc))),
? ? ? ? col=col[i])
? }else{
? ? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? ? percent=T,plot=T, grid=T, add=T,lty=i,smooth = T,
? ? ? ? ? ? ? ? ? print.auc=F,col=col[i])
? ? text(30,50-10*i,
? ? ? ? paste(name.value[i],":",sprintf("%0.4f",as.numeric(roc.data$auc))),
? ? ? ? col=col[i])
? }
}
dev.off()
## ROC with Confidence intervals ##
transparentColor <- function(col,alpha=200){
? # alpha is an integer >= 1 and <= 255
? col.rgb = as.numeric(col2rgb(col))
? col.rgb.alpha = rgb(col.rgb[1],col.rgb[2],col.rgb[3],alpha=alpha,maxColorValue = 255)
? return(col.rgb.alpha)
}
## CI of the curve
# curve shape
pdf("3.ROC_with_confidence_level_ribbon.pdf",height = 3,width = 3*num.value)
par(mfrow=c(1,num.value))
for (i in 1:num.value){
? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? percent=T,plot=T, grid=T,
? ? ? ? ? ? ? ? print.auc=F,col=transparentColor("white",255))
? sens.ci <- ci.se(roc.data, boot.n=100,conf.level = 0.95,specificities=seq(0, 100, 5))
? plot(sens.ci,type="shape",col=transparentColor(col[i],100))
? text(30,40,"AUC",font = 2,col="darkgray")
? text(30,30,
? ? ? paste(name.value[i],":",sprintf("%0.4f",as.numeric(roc.data$auc))),
? ? ? col=col[i])
}
dev.off()
#curve bar
pdf("4.ROC_with_confidence_level_bars_Combined.pdf",height = 3,width = 3*num.value)
par(mfrow=c(1,num.value))
for (i in 1:num.value){
? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? percent=T,plot=T, grid=T,
? ? ? ? ? ? ? ? print.auc=F,col=col[i])
? sens.ci <- ci.se(roc.data, boot.n=100,conf.level = 0.95,specificities=seq(0, 100, 5))
? plot(sens.ci,type="bars",col=col[i])
? text(30,40,"AUC",font = 2,col="darkgray")
? text(30,30,
? ? ? paste(name.value[i],":",sprintf("%0.4f",as.numeric(roc.data$auc))),
? ? ? col=col[i])
}
dev.off()
## Performance ##
getROCPerformance <- function(rocdata){
? performance = coords(roc.data, "best", best.method = "youden",
? ? ? ? ? ? ? ? ? ? ? ret=c("threshold", "sensitivity","specificity",
? ? ? ? ? ? ? ? ? ? ? ? ? ? "npv","ppv","tpr","fpr",
? ? ? ? ? ? ? ? ? ? ? ? ? ? "tnr","fnr","fdr","accuracy",
? ? ? ? ? ? ? ? ? ? ? ? ? ? "precision","youden"),
? ? ? ? ? ? ? ? ? ? ? transpose = F)
? res = t(as.data.frame(performance))
? auc = as.numeric(roc.data$auc)
? res = rbind(auc,res)
? return(res)
}
perf.all = NULL
roc.data.all = NULL
for (i in 1:num.value){
? roc.data = roc(data[,1],data[,i+1],
? ? ? ? ? ? ? ? percent=T,plot=F)
? roc.data.all = c(roc.data.all,roc.data)
? perf = getROCPerformance(roc.data)
? colnames(perf) = name.value[i]
? perf.all = cbind(perf.all,perf)
}
perf.all = as.data.frame(perf.all)
perf.all$Index = c("AUC","Best Cut-off Value","Sensitivity","Specificity",
? ? ? ? ? ? ? ? ? "Negative Predictive Value","Positive Predictive Value",
? ? ? ? ? ? ? ? ? "True Positive Rate","False Positive Rate",
? ? ? ? ? ? ? ? ? "True Negatice Rate","False Negative Rate",
? ? ? ? ? ? ? ? ? "False Discovery Rate","Accuracy","Precision","Youden Index")
perf.all = perf.all[,c(ncol(perf.all),1:(ncol(perf.all)-1))]
write.table(perf.all,"5.Model.Performance.csv",row.names = F,col.names = T,quote=F,sep=",")
## Delong Comparisons ##
pair = t(combn(2:ncol(data),2))
compair.result = NULL
for (i in 1:nrow(pair)){
? name1 = colnames(data)[pair[i,1]]
? compair.result$Model1.ROC = c(compair.result$Model1.ROC,name1)
? roc1 = roc(data[,1],data[,pair[i,1]],
? ? ? ? ? ? percent=T,plot=F)
? name2 = colnames(data)[pair[i,2]]
? compair.result$Model2.ROC = c(compair.result$Model2.ROC,name2)
? roc2 = roc(data[,1],data[,pair[i,2]],
? ? ? ? ? ? percent=T,plot=F)
? p = roc.test(roc1, roc2, reuse.auc=FALSE, boot.n = 1000, boot.stratified = F)$p.value
? compair.result$Pvalue.Delong.test = c(compair.result$Pvalue.Delong.test,p)
}
compair.result = as.data.frame(compair.result)
write.table(compair.result,"6.Delong.Comparision.csv",row.names = F,col.names = T,quote=F,sep=",")
print('finish')