我正在尝试使用REEMtree运行一个简单的CART风格的混合效果模型。我不明白为什么它总是抛出错误:
“错误:逻辑索引向量的长度必须为1或1( 列),而不是120英寸
以下是示例数据:
structure(list(avg_ipt = c(14, 8.666666667, 30, 3.857142857,
2.153846154, 2.363636364, 2.071428571, 2.25, 1.636363636, 14,
13.5, 30, 30, 12.5, 30, 30, 2.636363636, 1.588235294, 0.769230769,
2.25, 2.416666667, 1.866666667, 6.5, 30, 14, 14, 14, 9, 3, 1.260869565,
0.965517241, 1.588235294, 1.125, 14, 12, 30, 9.5, 13.5, 9.333333333,
2.5, 2.5, 1.45, 1.6875, 1.666666667, 2.5, 9.333333333, 9.333333333,
30, 12.5, 13.5, 9.333333333, 13, 3.222222222, 1.333333333, 2.444444444,
2.5, 3.714285714, 14, 14, 30, 30, 5.5, 30, 30, 30, 30, 30, 30,
12, 9.5, 30, 10.5, 30, 30, 30, 30, 10, 30, 30, 8, 1, 30, 30,
10, 30, 8.5, 30, 30, 6, 30, 30, 12.5, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30), monthly_expense = c(2234.77, 6434, 0,
170.2, 461.16, 1971.64, 918.45, 1280.06, 2584.93, 5676.95, 1350,
0, 0, 6710.41, 0, 0, 692.01, 2521.26, 3481.25, 535.32, 195.45,
22818.28, 1402.32, 0, 2232.17, 2232.17, 2683.57, 37.13, 525.54,
2695.45, 4178.1, 1979.42, 1348.62, 5956.95, 1350, 0, 2325, 2325,
2676.5, 166.41, 1084.69, 3594.95, 1291.39, 663.01, 610.77, 6014.49,
2929.56, 0, 2161.25, 2161.25, 8995, 70.49, 1978.14, 2361.29,
306.35, 994.91, 385.73, 5974.42, 2270.63, 0, 0, 239.04, 0, 0,
0, 0, 0, 0, 183.46, 72.89, 0, 182.93, 0, 0, 0, 0, 523.91, 0,
0, 239.54, 108.63, 0, 0, 192.21, 0, 30.03, 0, 0, 46.12, 0, 0,
177.64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), time.period = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L), id = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27",
"28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38",
"39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49",
"50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60",
"61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71",
"72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82",
"83", "84", "85", "86", "87", "88", "89", "90", "91", "92", "93",
"94", "95", "96", "97", "98", "99", "100"), class = "factor")), row.names = c(NA,
-120L), class = c("tbl_df", "tbl", "data.frame"))
和代码:
library(REEMtree)
model <- REEMtree(monthly_expense ~ avg_ipt, data = sample_df, random =~1|id)
有人知道为什么会引发错误吗?
答案 0 :(得分:0)
请确保sample_df
具有data.frame
类,即其S3类具有一个元素,即"data.frame"
,而不是c("tbl_df", "tbl", "data.frame")
。
library(REEMtree)
sample_df <- data.frame(sample_df)
model <- REEMtree(monthly_expense ~ avg_ipt, data = sample_df, random =~1|id)
给予:
> model
[1] "*** RE-EM Tree ***"
n= 120
node), split, n, deviance, yval
* denotes terminal node
1) root 120 670354400 1166.627 *
[1] "Estimated covariance matrix of random effects:"
(Intercept)
(Intercept) 2474326
[1] "Estimated variance of errors: 5679196.50339722"
[1] "Log likelihood: -1098.26101212641"
在REEMtree中出现问题是因为该行根据数据是普通数据帧还是Target
对象而给出不同的tbl_df
(然后在代码中此差异导致错误)。
Target <- data[, toString(TargetName)]
如果data
是纯数据帧,则Target
是纯矢量,但是如果data
是tbl_df
,则Target
也是一个。
这将解决该问题:
Target <- data[, toString(TargetName), drop = TRUE]
或作为toString(TargetName)
只能是标量:
Target <- data[[toString(TargetName)]]