我正在尝试使用R中的AUC包来获取AUC图。我不确定该错误并且对此不熟悉 适合是训练有素的模型: test是测试数据
test$going_to_cross <- predict(fit, test, type="prob")
prediction <- predict(fit, test, type="prob")
submit <- data.frame(cust_id = test$cust_id, already_crossed = test$flag_cross_over, predictions = prediction)
write.csv(submit, file = "../predictions /cross_sell_predictionsRF.csv", row.names = FALSE)
head(submit, 5)
print("predictions")
colnames(prediction) <- c("predictiona", "predictionb")
head(prediction)
which(submit$going_to_cross == 1)
print("names submit")
names(submit)
#predict_cross <- submit$going_to_cross.0
head(predict_cross, 5)
我在这里得到输出:
cust_id already_crossed predictions.0 predictions.1
280 14080465 0 0.436 0.564
281 24047747 0 0.218 0.782
282 10897483 0 0.606 0.394
283 14005276 0 0.448 0.552
284 18488402 0 0.284 0.716
[1] "predictions"
Out[317]:
predictiona predictionb
280 0.436 0.564
281 0.218 0.782
282 0.606 0.394
283 0.448 0.552
284 0.284 0.716
285 0.104 0.896
包中的代码是:
auc(sensitivity(submit$predictions, submit$already_crossed))
警告信息是:
警告讯息: 在is.na(x)中:is.na()应用于类型&#39; NULL&#39;
的非(列表或向量)更新
# get the data into single vectors
submit_pred <- matrix(submit$predictions.1)
submit_cross <- matrix(submit$already_crossed)
dt <- cbind(submit_pred, submit_cross)
dt <- matrix(dt)
names(dt) <- c("submit_pred", "submit_cross")
roc_pred <- prediction(dt$submit_pred, dt$submit_cross)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="red")
abline(0,1,col="grey")
性能(roc_pred,&#34; AUC&#34;)@ y.values 头(DT)
答案 0 :(得分:0)
尝试将此脚本调整为数据集(使用包ROCR)。
library(ROCR)
# example dataset with some 0 and some 1 values as actual observations
dt = data.frame(matrix(data=c(
14080465 , 0 , 0.436 , 0.564,
24047747 , 1 , 0.218 , 0.782 ,
10897483 , 0 , 0.606 , 0.394,
14005276 , 0 , 0.448 , 0.552,
18488402 , 1 , 0.284 , 0.716
), nrow = 5, ncol = 4, byrow = T))
names(dt) = c("cust_id", "already_crossed", "predictions.0", "predictions.1")
# obtain ROC curve
roc_pred <- prediction(dt$predictions.1, dt$already_crossed)
perf <- performance(roc_pred, "tpr", "fpr")
plot(perf, col="red")
abline(0,1,col="grey")
# get area under the curve
performance(roc_pred,"auc")@y.values
您也可以使用您的方法(使用包AUC):
library(AUC)
# example dataset with some 0 and some 1 values as actual observations
dt = data.frame(matrix(data=c(
14080465 , 0 , 0.436 , 0.564,
24047747 , 1 , 0.218 , 0.782 ,
10897483 , 0 , 0.606 , 0.394,
14005276 , 0 , 0.448 , 0.552,
18488402 , 1 , 0.284 , 0.716
), nrow = 5, ncol = 4, byrow = T))
names(dt) = c("cust_id", "already_crossed", "predictions.0", "predictions.1")
auc(sensitivity(dt$predictions.1, as.factor(dt$already_crossed)))
plot(sensitivity(dt$predictions.1, as.factor(dt$already_crossed)))
正如我之前所说,你必须通过一个预测向量。 此外,您需要将实际类(0和1)保存为因子,否则灵敏度函数将中断。但是,我认为您想要计算的内容(使用您的方法)是:
auc(roc(dt$predictions.1, as.factor(dt$already_crossed)))
plot(roc(dt$predictions.1, as.factor(dt$already_crossed)))