我正在重塑视频的每分钟时间序列数据,我需要将列数限制为每个id组的前10行。我无法弄清楚如何通过t
将内部分组限制为前10行。
问题是t
有1到700行,reshape
将所有行作为列返回。我只想要前10个(每个分组的时间序列的前10分钟)。
matrixed = reshape(d, idvar="id",timevar="t",direction="wide")
返回一行158列。
以下是您可以在R中重现的示例数据集:
structure(list(id = c("NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL",
"NluslS4RXL", "NluslS4RXL", "NluslS4RXL", "NluslS4RXL"), t = c(0,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
151, 152, 153, 154, 155, 156), viewers = c(4L, 12L, 21L, 36L,
49L, 62L, 84L, 113L, 145L, 203L, 270L, 362L, 419L, 496L, 532L,
702L, 778L, 835L, 963L, 1042L, 1091L, 1159L, 1209L, 1247L, 1278L,
1316L, 1356L, 1399L, 1443L, 1446L, 1441L, 1464L, 1488L, 1504L,
1527L, 1558L, 1583L, 1645L, 1672L, 1724L, 1736L, 1767L, 1800L,
1794L, 1800L, 1828L, 1860L, 1914L, 1942L, 1948L, 1960L, 1972L,
2004L, 2024L, 2062L, 2076L, 2052L, 2059L, 2074L, 2092L, 2119L,
2116L, 2113L, 2132L, 2122L, 2157L, 2171L, 2183L, 2179L, 2183L,
2178L, 2184L, 2176L, 2200L, 2207L, 2205L, 2203L, 2222L, 2246L,
2286L, 2298L, 2286L, 2294L, 2290L, 2304L, 2296L, 2293L, 2351L,
2328L, 2305L, 2279L, 2284L, 2260L, 2241L, 2222L, 2205L, 2180L,
2182L, 2184L, 2166L, 2169L, 2133L, 2122L, 2100L, 2114L, 2101L,
2075L, 2065L, 2036L, 2006L, 2005L, 1989L, 1974L, 1973L, 1966L,
1959L, 1922L, 1897L, 1872L, 1870L, 1871L, 1872L, 1865L, 1847L,
1816L, 1809L, 1792L, 1774L, 1770L, 1747L, 1704L, 1681L, 1671L,
1659L, 1611L, 1574L, 1566L, 1555L, 1527L, 1480L, 1451L, 1400L,
1358L, 1341L, 1290L, 1289L, 1254L, 1220L, 1181L, 1142L, 1119L,
1055L, 1004L, 942L, 866L, 787L, 713L)), .Names = c("id", "t",
"viewers"), row.names = c(NA, -157L), class = "data.frame")
答案 0 :(得分:3)
一种选择是使用group by方法,获取前10行,然后将其重新整形为'wide'。使用dplyr/tidyr
,我们可以使用slice
执行此操作,将每个'id'的前10行和spread
变为'wide'格式
library(dplyr)
library(tidyr)
d %>%
group_by(id) %>%
slice(seq_len(10)) %>%
spread(t, viewers)
答案 1 :(得分:2)
第一个子集d
提供d10
,然后在reshape
上应用d10
命令。没有包使用。
如果我们知道每个id的前10行有t = 0,1,2,...,9 - 在示例中也是如此 - 那么:
d10 <- subset(d, t < 10)
或者如果我们不能做出这样的假设:
d10 <- do.call("rbind", by(d, d$id, head, 10))
或:
d10 <- subset(d, ave(t, id, FUN = seq_along) <= 10)