我有一个如下所示的数据集:
structure(list(A = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"), class = "factor"), T = c(0.04, 0.08, 0.12, 0.16, 0.2,
0.24), X = c(464.4, 464.4, 464.4, 464.4, 464.4, 464.4), Y = c(418.5,
418.5, 418.5, 418.5, 418.5, 418.5), V = c(0, 0, 0, 0, 0, 0),
GD = c(0, 0, 0, 0, 0, 0), ND = c(NA, 0, 0, 0, 0, 0), ND2 = c(NA,
0, 0, 0, 0, 0), TID = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("t1",
"t10", "t11", "t12", "t13", "t14", "t15", "t16", "t17", "t18",
"t19", "t2", "t20", "t21", "t22", "t23", "t24", "t25", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"), class = "factor")), .Names = c("A",
"T", "X", "Y", "V", "GD", "ND", "ND2", "TID"), row.names = c(NA,
6L), class = "data.frame")
我想为每个TID选择所有变量的前80个观察值。到目前为止,我只能使用代码来完成第一个TID:
sub.data1<-NM[1:80, ]
我如何为所有其他TID执行此操作?
谢谢!
答案 0 :(得分:7)
我愿意:
lapply(split(dat, dat$TID), head, 80)
它返回一个包含80(或更少)行的data.frames列表。相反,如果你想把所有东西都放在一个data.frame中:
do.call(rbind, lapply(split(dat, dat$TID), head, 80))
答案 1 :(得分:5)
使用ddply()
中的plyr
函数,您可以按TID拆分数据,然后选择带有head()
的forst 80,然后将其全部放入一个数据框中,
library(plyr)
ddply(NM, .(TID), head, n = 80)
答案 2 :(得分:3)
使用数据表,我做了一个较短的例子,只有TID t1和t2返回t1和t2的前两行。它可以根据您的数据进行调整。
library(data.table)
data<-structure(list(A = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"), class = "factor"), T = c(0.04, 0.08, 0.12, 0.16, 0.2,
0.24), X = c(464.4, 464.4, 464.4, 464.4, 464.4, 464.4), Y = c(418.5,
418.5, 418.5, 418.5, 418.5, 418.5), V = c(0, 0, 0, 0, 0, 0),
GD = c(0, 0, 0, 0, 0, 0), ND = c(NA, 0, 0, 0, 0, 0), ND2 = c(NA,
0, 0, 0, 0, 0), TID = c("t1","t1","t1","t2","t2","t2")), .Names = c("A",
"T", "X", "Y", "V", "GD", "ND", "ND2", "TID"), row.names = c(NA,
6L), class = "data.frame")
dt<-data.table(data)
dt[,head(.SD,2),by=TID]
这导致:
TID A T X Y V GD ND ND2
1: t1 1 0.04 464.4 418.5 0 0 NA NA
2: t1 1 0.08 464.4 418.5 0 0 0 0
3: t2 1 0.16 464.4 418.5 0 0 0 0
4: t2 1 0.20 464.4 418.5 0 0 0 0
如果需要,可以通过将最后一行更改为
来更改回数据框as.data.frame(dt[,head(.SD,2),by=TID])
答案 3 :(得分:2)
这是基地的另一个解决方案:
do.call(rbind, by(NM, NM$TID, head, 80))