我编写了自己的代码来创建ROC曲线,然后根据定义计算AUC,但是我得到了与其他软件包不同的结果,我不明白我在这里做错了什么。这是我的代码。先感谢您。
ROC.sort <- function(pred,true,vals,decreasing=FALSE){
sorted.index <- sort(vals,index=TRUE,decreasing=decreasing)$ix
sort.pred <- pred[sorted.index]
sort.true <- true[sorted.index]
return(list(sort.pred,sort.true))
}
ROC.ploter <- function(pred,true,add.to.plot=FALSE,color="black"){
index <- which(pred == 1)
vals <- true[index]
n.true <- 1/length(which(vals==1))
n.false <- 1/length(which(vals==0))
points <- data.frame(0,0)
last.point <- c(0,0)
for(i in 1:length(vals)){
if(vals[i] == 0){
next.point <- last.point + c(n.false,0)
last.point <- next.point
points <- rbind(points,next.point)
} else {
next.point <- last.point + c(0,n.true)
last.point <- next.point
points <- rbind(points,next.point)
}
}
if(add.to.plot){
points(x=points[,1],y=points[,2],col=color,type="l")
} else {
plot(x=points[,1],y=points[,2],xlab="False Positives",ylab="True Positives",
main= "ROC",type="l",col=color)
points(x=c(0,1),y=c(0,1),type='l')
}
return(points)
}
find.auc <- function(points){
x <- points[,1]
y <- points[,2]
result <- 0
n <- length(x)
x.prev <- x[1]
y.prev <- y[1]
for(i in 2:n){
x.curr <- x[i]
y.curr <- y[i]
if(x.curr == x.prev){
y.prev <- y.curr
} else {
result <- result + y.prev*(x.curr-x.prev)
x.prev <- x.curr
}
}
return(result)
}
这是一个有效的例子。
library(LiblineaR)
size <- 10000
truth <- rbinom(size,1,.9)
dat <- cbind(truth+rnorm(size),truth*rnorm(size))
training.index <- (1:(floor(2*size/3)))
model <- LiblineaR(data=dat[training.index,],labels=truth[training.index])
predicted <- predict(model,dat[-training.index,],decisionValues=TRUE)
pred.labs <- predicted$predictions
pred.scores <- predicted$decisionValues
sorted <- ROC.sort(pred.labs,truth[-training.index],pred.scores,decreasing=TRUE)
pts <- ROC.ploter(sorted[[1]],sorted[[2]])
auc <- find.auc(pts)
print(auc)
并在其上创建一个带有ROCR ROC曲线的图和我自己在同一图上的ROC曲线。
pred <- prediction(pred.scores, truth[testing.index])
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
plot(perf, col=rainbow(10))
pts <- ROC.ploter(sorted[[1]],sorted[[2]],add.to.plot=TRUE)