AUC包 - AUC错误 - r编程

时间:2015-08-12 12:47:45

标签: r machine-learning random-forest auc

我正在尝试使用R中的AUC包来获取AUC图。我不确定该错误并且对此不熟悉 适合是训练有素的模型: test是测试数据

test$going_to_cross <- predict(fit, test, type="prob") 

prediction <- predict(fit, test, type="prob")
submit <- data.frame(cust_id = test$cust_id, already_crossed = test$flag_cross_over, predictions = prediction)
write.csv(submit, file = "../predictions /cross_sell_predictionsRF.csv", row.names = FALSE)

head(submit, 5)

print("predictions")
colnames(prediction) <- c("predictiona", "predictionb")
head(prediction)
which(submit$going_to_cross == 1)


print("names submit")
names(submit)

#predict_cross <- submit$going_to_cross.0
head(predict_cross, 5)

我在这里得到输出:

    cust_id already_crossed predictions.0   predictions.1
280 14080465    0           0.436   0.564
281 24047747    0           0.218   0.782 
282 10897483    0           0.606   0.394
283 14005276    0           0.448   0.552
284 18488402    0           0.284   0.716

[1] "predictions"

Out[317]:
    predictiona predictionb
280 0.436   0.564
281 0.218   0.782
282 0.606   0.394
283 0.448   0.552
284 0.284   0.716
285 0.104   0.896

包中的代码是:

auc(sensitivity(submit$predictions, submit$already_crossed))

警告信息是:

警告讯息: 在is.na(x)中:is.na()应用于类型&#39; NULL&#39;

的非(列表或向量)

更新

# get the data into single vectors
 submit_pred <- matrix(submit$predictions.1)
 submit_cross <- matrix(submit$already_crossed)

 dt <- cbind(submit_pred, submit_cross)
  dt <- matrix(dt)


  names(dt) <- c("submit_pred", "submit_cross")

 roc_pred <- prediction(dt$submit_pred, dt$submit_cross)
 perf <- performance(roc_pred, "tpr", "fpr")
 plot(perf, col="red")
 abline(0,1,col="grey")

获取曲线下面积

性能(roc_pred,&#34; AUC&#34;)@ y.values 头(DT)

1 个答案:

答案 0 :(得分:0)

尝试将此脚本调整为数据集(使用包ROCR)。

library(ROCR)

# example dataset with some 0 and some 1 values as actual observations
dt = data.frame(matrix(data=c(
14080465 ,  0 ,  0.436 , 0.564,
24047747 ,  1 ,  0.218 , 0.782 ,
10897483 ,  0 ,  0.606 , 0.394,
14005276 ,  0 ,  0.448 , 0.552,
18488402 ,  1 ,  0.284 , 0.716
), nrow = 5, ncol = 4, byrow = T))

names(dt) = c("cust_id", "already_crossed", "predictions.0",   "predictions.1")

# obtain ROC curve
roc_pred <- prediction(dt$predictions.1, dt$already_crossed)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="red")
abline(0,1,col="grey")

# get area under the curve
performance(roc_pred,"auc")@y.values

您也可以使用您的方法(使用包AUC):

library(AUC)

# example dataset with some 0 and some 1 values as actual observations
dt = data.frame(matrix(data=c(
14080465 ,  0 ,  0.436 , 0.564,
24047747 ,  1 ,  0.218 , 0.782 ,
10897483 ,  0 ,  0.606 , 0.394,
14005276 ,  0 ,  0.448 , 0.552,
18488402 ,  1 ,  0.284 , 0.716
), nrow = 5, ncol = 4, byrow = T))

names(dt) = c("cust_id", "already_crossed", "predictions.0",   "predictions.1")

auc(sensitivity(dt$predictions.1, as.factor(dt$already_crossed)))
plot(sensitivity(dt$predictions.1, as.factor(dt$already_crossed)))

正如我之前所说,你必须通过一个预测向量。 此外,您需要将实际类(0和1)保存为因子,否则灵敏度函数将中断。但是,我认为您想要计算的内容(使用您的方法)是:

auc(roc(dt$predictions.1, as.factor(dt$already_crossed)))
plot(roc(dt$predictions.1, as.factor(dt$already_crossed)))