获取曲线下面积

Question

我正在尝试使用R中的AUC包来获取AUC图。我不确定该错误并且对此不熟悉适合是训练有素的模型： test是测试数据

test$going_to_cross <- predict(fit, test, type="prob") 

prediction <- predict(fit, test, type="prob")
submit <- data.frame(cust_id = test$cust_id, already_crossed = test$flag_cross_over, predictions = prediction)
write.csv(submit, file = "../predictions /cross_sell_predictionsRF.csv", row.names = FALSE)

head(submit, 5)

print("predictions")
colnames(prediction) <- c("predictiona", "predictionb")
head(prediction)
which(submit$going_to_cross == 1)


print("names submit")
names(submit)

#predict_cross <- submit$going_to_cross.0
head(predict_cross, 5)

我在这里得到输出：

    cust_id already_crossed predictions.0   predictions.1
280 14080465    0           0.436   0.564
281 24047747    0           0.218   0.782 
282 10897483    0           0.606   0.394
283 14005276    0           0.448   0.552
284 18488402    0           0.284   0.716

[1] "predictions"

Out[317]:
    predictiona predictionb
280 0.436   0.564
281 0.218   0.782
282 0.606   0.394
283 0.448   0.552
284 0.284   0.716
285 0.104   0.896

包中的代码是：

auc(sensitivity(submit$predictions, submit$already_crossed))

警告信息是：

警告讯息：在is.na（x）中：is.na（）应用于类型＆＃39; NULL＆＃39;

的非（列表或向量）

更新

# get the data into single vectors
 submit_pred <- matrix(submit$predictions.1)
 submit_cross <- matrix(submit$already_crossed)

 dt <- cbind(submit_pred, submit_cross)
  dt <- matrix(dt)


  names(dt) <- c("submit_pred", "submit_cross")

 roc_pred <- prediction(dt$submit_pred, dt$submit_cross)
 perf <- performance(roc_pred, "tpr", "fpr")
 plot(perf, col="red")
 abline(0,1,col="grey")

获取曲线下面积

性能（roc_pred，＆＃34; AUC＆＃34;）@ y.values 头（DT）

Answer 1

尝试将此脚本调整为数据集（使用包ROCR）。

library(ROCR)

# example dataset with some 0 and some 1 values as actual observations
dt = data.frame(matrix(data=c(
14080465 ,  0 ,  0.436 , 0.564,
24047747 ,  1 ,  0.218 , 0.782 ,
10897483 ,  0 ,  0.606 , 0.394,
14005276 ,  0 ,  0.448 , 0.552,
18488402 ,  1 ,  0.284 , 0.716
), nrow = 5, ncol = 4, byrow = T))

names(dt) = c("cust_id", "already_crossed", "predictions.0",   "predictions.1")

# obtain ROC curve
roc_pred <- prediction(dt$predictions.1, dt$already_crossed)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="red")
abline(0,1,col="grey")

# get area under the curve
performance(roc_pred,"auc")@y.values

您也可以使用您的方法（使用包AUC）：

library(AUC)

# example dataset with some 0 and some 1 values as actual observations
dt = data.frame(matrix(data=c(
14080465 ,  0 ,  0.436 , 0.564,
24047747 ,  1 ,  0.218 , 0.782 ,
10897483 ,  0 ,  0.606 , 0.394,
14005276 ,  0 ,  0.448 , 0.552,
18488402 ,  1 ,  0.284 , 0.716
), nrow = 5, ncol = 4, byrow = T))

names(dt) = c("cust_id", "already_crossed", "predictions.0",   "predictions.1")

auc(sensitivity(dt$predictions.1, as.factor(dt$already_crossed)))
plot(sensitivity(dt$predictions.1, as.factor(dt$already_crossed)))

正如我之前所说，你必须通过一个预测向量。此外，您需要将实际类（0和1）保存为因子，否则灵敏度函数将中断。但是，我认为您想要计算的内容（使用您的方法）是：

auc(roc(dt$predictions.1, as.factor(dt$already_crossed)))
plot(roc(dt$predictions.1, as.factor(dt$already_crossed)))

AUC包 - AUC错误 - r编程

获取曲线下面积

1 个答案: