我第一次遇到nrow(model.matrix(x))!=nrow(x)
的情况。它没有在?model.matrix
中记录。是什么导致了这个?有没有办法防止它掉线?
modelVars <- c('duration','age','ahour','amonth','aweekend','ayear','female','unins','region','admithos')
xFormTxt <- paste0(modelVars,collapse=" + ")
modelForm <- as.formula(paste0("waittime ~",xFormTxt))
xMM <- model.matrix( as.formula(paste0("~",xFormTxt)), x )
> nrow(x)
[1] 208
> nrow(xMM)
[1] 152
并且样本数据因此可重现:
x <- structure(list(duration = c(65L, 136L, 110L, 821L, 1520L, 95L,
180L, 458L, 173L, 132L, 250L, 138L, 192L, 53L, 155L, 59L, 237L,
NA, 67L, 82L, 283L, 276L, 357L, 270L, 704L, 110L, 178L, 451L,
90L, 31L, 398L, 168L, 65L, 230L, 519L, 149L, 27L, 362L, 265L,
294L, 233L, 175L, 15L, 255L, 50L, 81L, 0L, NA, 195L, 461L, 299L,
193L, 285L, NA, 79L, 154L, 136L, 218L, 100L, 49L, 257L, 245L,
193L, 79L, 160L, NA, 60L, 259L, 15L, 127L, 406L, 92L, 426L, 577L,
313L, 268L, 83L, 15L, 78L, 162L, 266L, NA, 108L, NA, 65L, 455L,
237L, 320L, 83L, 1346L, NA, 518L, 140L, 951L, 170L, 402L, 129L,
51L, 184L, 391L, 456L, 146L, 491L, NA, 43L, 283L, 71L, NA, 89L,
42L, 142L, 387L, NA, 40L, 147L, 128L, NA, 244L, 233L, 327L, 294L,
NA, 50L, NA, 201L, 104L, 105L, 75L, 114L, 403L, 107L, 213L, 44L,
96L, 283L, 121L, 93L, 98L, 106L, 58L, 151L, 540L, NA, 266L, 257L,
150L, 90L, 105L, 336L, 580L, 261L, 275L, 277L, 192L, 77L, 255L,
140L, 1029L, 188L, NA, 135L, 178L, 523L, 345L, 110L, 216L, 123L,
38L, 363L, 55L, 109L, 585L, NA, 65L, 62L, 127L, 83L, NA, 131L,
290L, 25L, 815L, 96L, 134L, 116L, 40L, 188L, 286L, NA, 401L,
140L, 119L, 36L, 122L, 118L, 4L, 638L, 55L, 424L, 132L, 63L,
79L, 85L, NA, 130L, 312L, 68L, 75L), age = c(39L, 0L, 37L, 35L,
74L, 53L, 25L, 39L, 54L, 85L, 19L, 11L, 49L, 5L, 14L, 62L, 13L,
62L, 41L, 51L, 41L, 83L, 64L, 47L, 27L, 1L, 37L, 1L, 34L, 11L,
41L, 34L, 18L, 13L, 62L, 13L, 35L, 20L, 40L, 90L, 25L, 64L, 0L,
84L, 67L, 29L, 1L, 24L, 55L, 19L, 36L, 18L, 37L, 39L, 59L, 21L,
4L, 32L, 7L, 1L, 0L, 21L, 83L, 15L, 81L, 56L, 24L, 52L, 48L,
20L, 51L, 1L, 63L, 44L, 93L, 50L, 22L, 1L, 41L, 32L, 82L, 26L,
2L, 36L, 33L, 83L, 33L, 37L, 3L, 26L, 7L, 39L, 19L, 30L, 69L,
21L, 16L, 14L, 73L, 75L, 79L, 22L, 85L, 2L, 26L, 88L, 56L, 0L,
13L, 25L, 1L, 19L, 38L, 3L, 83L, 21L, 44L, 33L, 25L, 26L, 93L,
65L, 46L, 27L, 80L, 5L, 81L, 17L, 12L, 20L, 64L, 71L, 39L, 38L,
0L, 47L, 39L, 40L, 44L, 57L, 18L, 39L, 42L, 60L, 36L, 21L, 16L,
39L, 0L, 16L, 22L, 40L, 43L, 76L, 3L, 54L, 30L, 24L, 10L, 23L,
90L, 55L, 36L, 66L, 65L, 4L, 3L, 0L, 86L, 18L, 25L, 82L, 30L,
38L, 9L, 0L, 0L, 15L, 69L, 93L, 57L, 57L, 33L, 20L, 37L, 22L,
34L, 13L, 50L, 67L, 88L, 1L, 1L, 23L, 22L, 60L, 68L, 50L, 84L,
72L, 21L, 1L, 19L, 20L, 54L, 72L, 36L, 0L), ahour = c(2250L,
2019L, 1120L, 2102L, 1925L, 1133L, 1535L, 1427L, 1534L, 1352L,
2200L, 1449L, 1335L, 2437L, 1939L, 1639L, 1819L, NA, 550L, 2328L,
2247L, 1230L, 2243L, 1316L, 2252L, 2145L, 2200L, 1139L, 1545L,
716L, 34L, 1627L, 1230L, 1630L, 1451L, 732L, 53L, 2204L, 2435L,
1711L, 1041L, 1040L, 2105L, 1321L, 1330L, 2354L, 1714L, 2045L,
2205L, 2259L, 1556L, 2010L, 1910L, -9L, 1114L, 1501L, 1756L,
2342L, 2120L, 1012L, 1803L, 1925L, 447L, 1605L, 450L, 812L, 2230L,
2048L, 1730L, 1610L, 944L, 948L, 1817L, 1859L, 1828L, 2008L,
1742L, 1835L, 800L, 1433L, 1107L, -9L, 2310L, 1936L, 1635L, 1855L,
2153L, 2010L, 1821L, 1224L, 1050L, 1023L, 810L, 1819L, 809L,
2008L, 1034L, 1426L, 956L, 1529L, 1524L, 1724L, 1304L, 1700L,
1821L, 2027L, 2320L, 1213L, 1206L, 8L, 2033L, 39L, 1150L, 2415L,
2238L, 1340L, 1522L, 1006L, 1932L, 537L, 1455L, NA, 1515L, 1200L,
539L, 855L, 2214L, 1542L, 811L, 2222L, 1359L, 1357L, 2046L, 2122L,
2443L, 1009L, 1315L, 802L, 1032L, 1348L, 1728L, 1841L, -9L, 1521L,
1141L, 12L, 2055L, 1633L, 1247L, 1110L, 1504L, 1920L, 538L, 1133L,
1909L, 454L, 1750L, 151L, 2057L, 255L, 1644L, 1847L, 1110L, 2321L,
620L, 1938L, 2114L, 1642L, 826L, 920L, 1926L, 740L, 1945L, 715L,
1713L, 1658L, 2016L, -9L, 1207L, 910L, 1359L, 2144L, 124L, 2246L,
2358L, 1910L, 932L, 2020L, 1053L, 830L, 1030L, 1528L, 1522L,
1123L, 1902L, 1156L, 604L, 1055L, 1256L, 1748L, 2417L, 233L,
1304L, NA, 2220L, 1423L, 1137L, 348L), amonth = structure(c(7L,
11L, 1L, 11L, 3L, 9L, 1L, 5L, 3L, 1L, 5L, 12L, 3L, 8L, 4L, 5L,
11L, 5L, 2L, 9L, 9L, 4L, 8L, 1L, 6L, 4L, 9L, 4L, 5L, 10L, 3L,
6L, 3L, 5L, 6L, 6L, 2L, 3L, 11L, 1L, 12L, 2L, 9L, 12L, 9L, 1L,
3L, 11L, 4L, 7L, 6L, 4L, 2L, 2L, 12L, 7L, 5L, 7L, 2L, 10L, 1L,
9L, 4L, 12L, 7L, 7L, 8L, 9L, 5L, 8L, 11L, 5L, 5L, 6L, 2L, 9L,
3L, 3L, 1L, 9L, 9L, 6L, 9L, 11L, 11L, 8L, 7L, 3L, 11L, 1L, 12L,
7L, 7L, 11L, 1L, 10L, 10L, 6L, 8L, 12L, 10L, 12L, 3L, 8L, 3L,
11L, 7L, 9L, 4L, 11L, 12L, 11L, 11L, 5L, 11L, 10L, 9L, 11L, 9L,
4L, 11L, 4L, 5L, 4L, 9L, 3L, 1L, 5L, 2L, 6L, 5L, 6L, 7L, 3L,
9L, 1L, 6L, 12L, 8L, 11L, 6L, 6L, 4L, 4L, 12L, 6L, 6L, 3L, 9L,
1L, 1L, 10L, 5L, 9L, 2L, 3L, 3L, 2L, 6L, 10L, 8L, 4L, 4L, 4L,
8L, 9L, 11L, 4L, 8L, 4L, 1L, 7L, 11L, 9L, 5L, 11L, 12L, 1L, 3L,
3L, 2L, 6L, 3L, 4L, 10L, 1L, 3L, 3L, 12L, 11L, 3L, 2L, 4L, 11L,
5L, 12L, 6L, 7L, 4L, 10L, 1L, 1L, 12L, 4L, 7L, 9L, 9L, 3L), .Label = c("January",
"February", "March", "April", "May", "June", "July", "August",
"September", "October", "November", "December"), class = "factor"),
aweekend = c(TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE,
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE,
FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE,
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE), ayear = c(2005L, 2006L, 2007L, 2007L,
2007L, 2009L, 2005L, 2005L, 2009L, 2006L, 2005L, 2008L, 2007L,
2005L, 2006L, 2007L, NA, 2005L, 2007L, 2006L, 2006L, NA,
2007L, 2009L, NA, 2007L, 2008L, 2005L, 2007L, 2009L, 2007L,
2005L, 2008L, 2006L, 2009L, 2005L, 2007L, 2006L, 2005L, 2009L,
NA, 2006L, 2006L, NA, 2009L, 2008L, 2009L, 2007L, 2005L,
2005L, NA, 2008L, 2007L, 2007L, 2006L, 2009L, 2007L, 2006L,
2006L, NA, NA, 2008L, 2007L, 2006L, 2008L, 2008L, 2009L,
2008L, 2008L, 2008L, 2005L, 2005L, 2009L, 2009L, 2006L, 2006L,
2005L, 2006L, 2006L, 2009L, 2006L, 2007L, 2006L, 2007L, 2007L,
2006L, NA, 2006L, NA, 2009L, 2005L, NA, 2007L, 2005L, 2007L,
2006L, 2007L, 2006L, 2008L, 2005L, 2009L, 2007L, 2006L, 2008L,
2005L, 2007L, 2008L, 2007L, 2009L, 2009L, 2006L, NA, 2009L,
2005L, 2006L, 2005L, 2006L, 2008L, 2005L, NA, NA, 2005L,
2009L, 2007L, 2008L, NA, NA, 2009L, 2006L, 2008L, NA, 2008L,
2005L, 2009L, 2005L, 2007L, 2008L, NA, 2005L, 2006L, 2007L,
2006L, 2009L, 2009L, 2007L, 2008L, 2006L, NA, NA, 2005L,
NA, 2007L, 2005L, 2007L, 2005L, 2007L, NA, NA, 2007L, 2005L,
2009L, 2008L, 2009L, NA, NA, 2009L, 2008L, 2005L, 2007L,
2006L, 2007L, 2006L, 2009L, 2006L, NA, 2006L, 2007L, 2009L,
2008L, 2005L, 2009L, 2009L, 2005L, 2008L, 2009L, 2005L, 2008L,
2008L, 2005L, NA, NA, 2008L, NA, 2009L, 2006L, 2008L, 2006L,
2008L, 2008L, 2005L, 2006L, NA, 2006L, 2005L, 2007L, 2005L,
2009L, 2009L), female = c(FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE,
FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE,
FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE,
FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE,
TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE,
TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE,
TRUE, TRUE), unins = c(FALSE, FALSE, FALSE, TRUE, TRUE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, NA, FALSE, NA, TRUE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, FALSE, TRUE,
FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, NA, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
NA, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, NA, NA, FALSE, TRUE, NA, FALSE, TRUE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE), region = structure(c(4L, 3L, 3L, 1L, 4L, 1L, 4L,
3L, 1L, 4L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L,
3L, 1L, 1L, 1L, 3L, 4L, 1L, 3L, 1L, 4L, 4L, 1L, 3L, 4L, 3L,
3L, 1L, 1L, 3L, 2L, 3L, 1L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L,
1L, 3L, 4L, 3L, 3L, 4L, 1L, 4L, 1L, 2L, 3L, 1L, 2L, 1L, 3L,
3L, 4L, 3L, 1L, 1L, 4L, 3L, 2L, 3L, 3L, 2L, 2L, 4L, 2L, 2L,
4L, 4L, 2L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 3L, 2L, 2L,
1L, 2L, 2L, 3L, 1L, 4L, 4L, 2L, 4L, 3L, 4L, 3L, 2L, 3L, 2L,
4L, 3L, 3L, 1L, 4L, 2L, 3L, 3L, 1L, 4L, 2L, 1L, 2L, 1L, 2L,
3L, 2L, 1L, 4L, 4L, 3L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L,
2L, 2L, 1L, 2L, 3L, 2L, 1L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 1L, 1L, 4L, 3L, 3L, 1L, 3L, 1L, 3L, 4L, 2L, 1L, 1L, 3L,
4L, 2L, 2L, 4L, 4L, 1L, 3L, 3L, 3L, 1L, 4L, 2L, 2L, 4L, 3L,
1L, 1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 2L, 2L, 2L,
3L, 4L, 4L, 2L, 2L, 3L), .Label = c("Northeast", "Midwest",
"South", "West"), class = "factor"), admithos = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor")), .Names = c("duration", "age",
"ahour", "amonth", "aweekend", "ayear", "female", "unins", "region",
"admithos"), class = "data.frame", row.names = c(NA, -208L))
答案 0 :(得分:4)
model.matrix()
仅保留所需/可用于满足您指定的模型的行。在这种情况下,它不知道您希望如何将NA
表示为它所形成的设计矩阵中的数字,因此它会删除包含NA
的任何行中的一个解释或响应变量列。
在这种情况下,您可以通过在complete.cases()
上运行x
来看到这一点:
sum(complete.cases(x))
# [1] 152
如果您希望这些丢弃的行进入模型矩阵,您可能希望将NA替换为某种类型的估算值。