我有一个这样的数据框:
starttime sx sy time
<chr> <chr> <chr> <chr>
1 1416924247145 667.75 824.25 1416924247145
2 1416924247145 667.875 824.25 1416924247158
3 1416924247145 668.5 824.5 1416924247198
4 1416924257557 231.25 602.25 1416924257557
5 1416924257557 230.625 602.25 1416924257570
6 1416924257557 229.625 601.875 1416924257597
7 1416924257557 228.75 601.25 1416924257610
8 1416924257557 227.5 600.0 1416924257623
9 1416924257557 216.875 587.75 1416924257717
10 1416924257557 207.125 572.625 1416924257797
11 1416924257600 525.425 525.636 1416924259999
我希望此数据帧的子集仅包含具有相同开始时间的第一个和最后一个元素的行。在这个例子中,这些行将是1,3,4,10和11.重要的是,还包括第一行和最后一行。我尝试使用 dplyr 包执行此操作,因为它看起来很适合。我使用 group_by(), filter(), first()和 last()函数,但我无法得到我想要的结果。 这就是结果的样子:
starttime sx sy time
<chr> <chr> <chr> <chr>
1 1416924247145 667.75 824.25 1416924247145
3 1416924247145 668.5 824.5 1416924247198
4 1416924257557 231.25 602.25 1416924257557
10 1416924257557 207.125 572.625 1416924257797
11 1416924257600 525.425 525.636 1416924259999
答案 0 :(得分:2)
使用dplyr
执行此操作的方法之一:
library(dplyr)
df %>% group_by(starttime) %>% slice(unique(c(1, n())))
#Source: local data frame [5 x 4]
#Groups: starttime [3]
#
# starttime sx sy time
# <dbl> <dbl> <dbl> <dbl>
#1 1.416924e+12 667.750 824.250 1.416924e+12
#2 1.416924e+12 668.500 824.500 1.416924e+12
#3 1.416924e+12 231.250 602.250 1.416924e+12
#4 1.416924e+12 207.125 572.625 1.416924e+12
#5 1.416924e+12 525.425 525.636 1.416924e+12
或使用data.table
:
library(data.table)
setDT(df)[, .SD[unique(c(1,.N))], starttime]
数据强>
structure(list(starttime = c(1416924247145, 1416924247145, 1416924247145,
1416924257557, 1416924257557, 1416924257557, 1416924257557, 1416924257557,
1416924257557, 1416924257557, 1416924257600), sx = c(667.75,
667.875, 668.5, 231.25, 230.625, 229.625, 228.75, 227.5, 216.875,
207.125, 525.425), sy = c(824.25, 824.25, 824.5, 602.25, 602.25,
601.875, 601.25, 600, 587.75, 572.625, 525.636), time = c(1416924247145,
1416924247158, 1416924247198, 1416924257557, 1416924257570, 1416924257597,
1416924257610, 1416924257623, 1416924257717, 1416924257797, 1416924259999
)), .Names = c("starttime", "sx", "sy", "time"), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))
答案 1 :(得分:0)
我们可以使用base R
i1 <- with(df, as.logical(ave(starttime, starttime,
FUN = function(x) seq_along(x) %in% range(seq_along(x)))))
df[i1,]
# starttime sx sy time
#1 1416924247145 667.750 824.250 1416924247145
#3 1416924247145 668.500 824.500 1416924247198
#4 1416924257557 231.250 602.250 1416924257557
#10 1416924257557 207.125 572.625 1416924257797
#11 1416924257600 525.425 525.636 1416924259999