使用dlply (from this post;我可以在data.frame的子集上生成线性模型列表。现在我有了这个列表,我想使用这些模型在另一个data.frame中生成值。
该列表包含每个DAY
和variable
子集的模型。我想将模型应用于另一个data.frame中的相同子集。例如,对于DAY
== 1和变量== Var.1
,模型(y = mx + b)为value = -4.521869(Location) + 21.315
。使用适当子集的模型,我会在另一个data.frame中计算Var.1
的值(例如dat_rec
已经有DAY
和Location
的条目。
有没有办法在另一个data.frame中的相同子集上使用列表中的模型(例如,使用模型来DAY
== 1和variable
== Var.1到在data.frame中填充值到处[例如,不同的Sites
] DAY
== 1和variable
== Var.1)是否有类似的列表方法来填充data.frame与使用列表中的模型计算的值?所需的最终产品(即下面的dat_rec
)是data.frame。
# Data
dat <- structure(list(Site = c(32L, 32L, 32L, 32L, 10L, 10L, 10L, 10L,
32L, 32L, 32L, 32L, 10L, 10L, 10L, 10L), Location = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), DAY = c(5L,
55L, 555L, 5555L, 5L, 55L, 555L, 5555L, 5L, 55L, 555L, 5555L,
5L, 55L, 555L, 5555L), Var.1 = c(20.9, 20.8, 21.03, 21.36, 21.73,
21.18, 20.73, 21.98, 21.73, 12.48702448, 12.19642662, 12.33218874,
11.85626285, 11.88812108, 12.70549981, 11.89587521), Var.2 = c(100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 90L, 90L, 90L, 91L,
92L, 88L, 89L, 90L), Var.3 = c(14.47, 14.4, 14.3, 14.14, 14.72,
14.62, 14.14, 14.49, 10.27287765, 10.27287765, 10.41763527, 10.51725376,
11.12918753, 10.81166867, 10.80656509, 11.00093898), Var.4 = c(890.19,
888.9, 889.14, 888.15, 889.57, 888.41, 887.48, 886.87, 688.15,
698.23, 650.99, 700.01, 699, 689.6, 658.7, 689.99)), .Names = c("Site",
"Location", "DAY", "Var.1", "Var.2", "Var.3", "Var.4"), class = "data.frame", row.names = c(NA,
-16L))
# melt data for use with dlply
mdat <- melt(dat, id=c("DAY", "Site", "Location"))
# this dlply solution was built from here https://stackoverflow.com/a/1214432/1670053
models_mdat <- dlply(mdat, c("DAY","variable"), function(df)
lm(value ~ Location, data = df))
# example (partial) result, with Var.1 filled in for two DAYs
# I've only filled in the values for Var.1 using the model from the list
# for DAY 5 and 55.
# not melted
dat_rec <- structure(list(Site = c(1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L), Location = c(0.1,
0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4), DAY = c(5L, 5L, 5L, 5L, 55L,
55L, 55L, 55L), Var.1 = c(20.8628131, 20.4106262, 19.9584393,
19.5062524, 20.1097573, 19.2295146, 18.3492719, 17.4690292),
Var.2 = c(NA, NA, NA, NA, NA, NA, NA, NA), Var.3 = c(NA,
NA, NA, NA, NA, NA, NA, NA), Var.4 = c(NA, NA, NA, NA, NA,
NA, NA, NA)), .Names = c("Site", "Location", "DAY", "Var.1",
"Var.2", "Var.3", "Var.4"), class = "data.frame", row.names = c(NA,
-8L))
# melted
dat_rec_melt <- structure(list(Site = c(1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L), Location = c(0.1, 0.2, 0.3, 0.4, 0.1,
0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2,
0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3,
0.4), DAY = c(5L, 5L, 5L, 5L, 55L, 55L, 55L, 55L, 5L, 5L, 5L,
5L, 55L, 55L, 55L, 55L, 5L, 5L, 5L, 5L, 55L, 55L, 55L, 55L, 5L,
5L, 5L, 5L, 55L, 55L, 55L, 55L), variable = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Var.1",
"Var.2", "Var.3", "Var.4"), class = "factor"), value = c(20.8628131,
20.4106262, 19.9584393, 19.5062524, 20.1097573, 19.2295146, 18.3492719,
17.4690292, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("Site",
"Location", "DAY", "variable", "value"), row.names = c(NA, -32L
), class = "data.frame")
答案 0 :(得分:2)
我认为您正在寻找predict
:
sapply(models_mdat ,predict,newdata=dat_rec)
编辑将结果与新数据对齐:
lapply(models_mdat ,function(x)
cbind(dat_rec,fit=predict(x,newdata=dat_rec)))
答案 1 :(得分:0)
使用来自agstudy的信息,似乎predict
是我正在寻找的用于计算模型中的值的工具。知道我想使用dlply
生成的模型列表来更新带有predictions
的data.frame,我对搜索找到解决方案的内容有了更好的了解。
我在post找到了一个解决方案。为了实现我正在寻找的结果,我需要使用模型列表以及数据作为列表。然后,预测可以与mdply
一起使用,最终得到更新的data.frame。
# melted
dat_rec_melt <- structure(list(Site = c(1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L), Location = c(0.1, 0.2, 0.3, 0.4, 0.1,
0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2,
0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3,
0.4), DAY = c(5L, 5L, 5L, 5L, 55L, 55L, 55L, 55L, 5L, 5L, 5L,
5L, 55L, 55L, 55L, 55L, 5L, 5L, 5L, 5L, 55L, 55L, 55L, 55L, 5L,
5L, 5L, 5L, 55L, 55L, 55L, 55L), variable = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Var.1",
"Var.2", "Var.3", "Var.4"), class = "factor"), value = c(20.8628131,
20.4106262, 19.9584393, 19.5062524, 20.1097573, 19.2295146, 18.3492719,
17.4690292, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("Site",
"Location", "DAY", "variable", "value"), row.names = c(NA, -32L
), class = "data.frame")
dat_rec_list <- dlply(dat_rec_melt, c("DAY", "variable"))
predictions <- mdply(cbind(mod = models_mdat, df = dat_rec_list), function(mod, df) {
mutate(df, pred = predict(mod, newdata = df))
})