使用R I对某些数据进行有序的逻辑回归。我需要使用拟合值来预测未来值。我有一个数据集,它有10个不同的参数组合,我想预测“结果”的概率。因变量(模型适合)。我已经使用过&split;' split'分成10个独立的数据帧,我想尝试对每个输出的数据帧运行预测语句。有人可以告诉我如何: 一个。创建一个运行预测语句的函数,和 湾通过10个数据帧循环功能? C。输出三个结果条件的预测概率,并将它们绑定到相关的输入数据帧
以下是我尝试的内容:
# Fit the model
library(MASS)
fit <- polr(Result ~ State + score1+ score2 + history + pastdefault, data = dat, Hess=TRUE)
## Model Summary
summary(fit)
#Take the new data which doesn't have predictions
t13<- dat[ which(dat$Tranche==13), ]
out <- split( t13 , f = r13$Gamen )
#Function
pred <- function(Result, State, score1, score2, history, pastdefault,...){
cbind(out, predict(fit, out, type = "probs"))}
#Use sapply to run prediction
pred.out <- sapply(out, pred)
除了建议之外,我还尝试运行预测声明,以防止将数据拆分为&#39; out&#39;根据以下代码:
#Predict against 'out' dataframes
newdat1 <- cbind(out, predict(fit, newdata=out, type = "probs"))
但是我得到了相同的错误消息:
<\ t> in eval(expr,envir,enclos):object&#39; State&#39;找不到
这里是使用head(数据)的数据:
State score1 score2 Salaries history exp moody Result pastdefault Tranche
1 Minnesota 6.000 17.5 35.948 9 579 1085 moreinfo yes 1
2 New Mexico 4.586 17.2 28.493 11 530 1015 fail No 1
3 Washington 5.906 20.2 36.151 48 494 937 fail No 1
4 West Virginia 6.107 14.8 31.944 17 484 932 moreinfo No 1
5 Louisiana 4.761 16.8 26.461 9 535 1021 pass yes 2
6 Oklahoma 4.845 15.5 28.172 9 536 1027 pass yes 2
Gamen
1 1
2 2
3 3
4 4
5 1
6 2
以下是dput(data)
的结果:
ture(list(State = structure(c(23L, 31L, 47L, 48L, 18L, 36L,
39L, 42L, 13L, 14L, 16L, 35L, 50L, 1L, 20L, 27L, 30L, 7L, 9L,
34L, 43L, 3L, 4L, 19L, 25L, 26L, 37L, 8L, 29L, 38L, 2L, 21L,
32L, 6L, 10L, 11L, 15L, 24L, 5L, 17L, 33L, 40L, 41L, 46L, 49L,
28L, 12L, 22L, 45L, 32L, 9L, 5L, 44L, 13L, 38L, 19L, 25L, 26L,
37L, 8L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas",
"California", "Colorado", "Connecticut", "Delaware", "Florida",
"Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts",
"Michigan", "Minnesota", "Mississippi", "Missouri", "Montana",
"Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
"New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma",
"Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota",
"Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington",
"West Virginia", "Wisconsin", "Wyoming"), class = "factor"),
score1 = c(6, 4.586, 5.906, 6.107, 4.761, 4.845, 7.469, 4.388,
6.136, 5.826, 5.817, 6.162, 6.16, 4.405, 7.245, 5.935, 9.774,
8.817, 5.718, 4.775, 5.222, 4.778, 4.459, 6.428, 5.383, 5.692,
6.436, 7.03, 5.859, 7.109, 8.963, 7.287, 9.623, 5.443, 5.193,
6.078, 5.483, 4.08, 4.992, 5.217, 5.077, 4.797, 4.775, 5.327,
6.93, 5.16, 4.21, 6.994, 6.75, 5.859, 7.109, 8.963, 3.656,
7.287, 9.623, 5.443, 6.078, 5.483, 4.08, 4.992), score2 = c(17.5,
17.2, 20.2, 14.8, 16.8, 15.5, 14.7, 18.6, 17.3, 17.5, 15.1,
16.6, 14.9, 17.2, 17, 14.5, 13.8, 14.4, 19.1, 15.3, 15.7,
19.3, 17.1, 13.8, 15.5, 16.3, 19.9, 16.6, 15.6, 17.1, 17.6,
14.8, 15.2, 18.4, 16.3, 17.9, 15.8, 17.5, 24, 17, 16.2, 16.4,
14.4, 14.6, 15.9, 18.7, 19.1, 20.1, 13.8, 16.6, 15.6, 17.1,
24.3, 17.6, 14.8, 15.2, 24, 17, 16.2, 14.4), Salaries = c(35.948,
28.493, 36.151, 31.944, 26.461, 28.172, 40.729, 32.477, 39.431,
36.785, 34.652, 36.802, 31.285, 31.144, 40.661, 30.922, 46.087,
50.045, 32.588, 26.327, 31.223, 32.175, 28.934, 31.972, 31.189,
28.785, 38.555, 39.076, 34.72, 44.51, 47.951, 40.795, 47.612,
34.571, 32.291, 38.518, 31.511, 26.818, 41.078, 32.257, 30.793,
30.279, 25.994, 33.987, 37.746, 34.836, 29.783, 41.895, 35.406,
28.785, 38.555, 39.076, 29.082, 31.511, 26.818, 41.078, 32.257,
25.994, 33.987, 37.746), history = c(9L, 11L, 48L, 17L, 9L,
9L, 70L, 12L, 13L, 58L, 9L, 23L, 10L, 8L, 64L, 9L, 70L, 81L,
48L, 5L, 47L, 27L, 6L, 68L, 9L, 21L, 51L, 68L, 70L, 70L,
47L, 80L, 74L, 29L, 65L, 57L, 5L, 4L, 45L, 11L, 60L, 58L,
5L, 65L, 9L, 30L, 15L, 11L, 68L, 45L, 11L, 60L, 4L, 58L,
5L, 65L, 9L, 5L, 65L, 80L), exp = c(579L, 530L, 494L, 484L,
535L, 536L, 463L, 543L, 560L, 467L, 557L, 515L, 525L, 538L,
479L, 556L, 478L, 477L, 469L, 592L, 474L, 496L, 523L, 469L,
550L, 536L, 499L, 468L, 491L, 461L, 489L, 477L, 473L, 518L,
448L, 482L, 583L, 540L, 485L, 522L, 454L, 443L, 563L, 468L,
572L, 483L, 511L, 549L, 472L, 526L, 528L, 530L, 563L, 532L,
534L, 536L, 538L, 540L, 542L, 544L), moody = c(1085L, 1015L,
937L, 932L, 1021L, 1027L, 888L, 1040L, 1048L, 882L, 1060L,
975L, 1001L, 1029L, 909L, 1050L, 898L, 908L, 889L, 1107L,
893L, 944L, 1005L, 896L, 1045L, 1009L, 947L, 897L, 935L,
880L, 934L, 907L, 892L, 980L, 854L, 889L, 1099L, 1036L, 902L,
999L, 865L, 844L, 1068L, 896L, 1073L, 917L, 979L, 1033L,
901L, 1002L, 1007L, 1011L, 1076L, 1016L, 900L, 760L, 1029L,
850L, 1038L, 990L), Result = structure(c(3L, 2L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L,
4L, 4L, 2L, 3L, 3L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 3L, 2L, 4L,
3L, 4L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"fail", "moreinfo", "pass"), class = "factor"), pastdefault = structure(c(2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L), .Label = c("No",
"yes"), class = "factor"), Tranche = c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L), Gamen = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L)), .Names = c("State", "score1", "score2",
"Salaries", "history", "exp", "moody", "Result", "pastdefault",
"Tranche", "Gamen"), class = "data.frame", row.names = c(NA,
-60L))