Question

我有一些传感器数据，每秒有100个数据条目。在最后一列中是毫秒，目前都是10。如何将时间和日期分组的毫秒数按行求和。

testdata <- structure(list(local_date = c("26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017", "26-06-2017",  "26-06-2017", "26-06-2017"), 
                           local_time = c("13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23", "13:58:23",  "13:58:23", "13:58:23", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24",  "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24", "13:58:24" ), 
                           ms = c(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)), 
                      .Names = c("local_date",  "local_time", "ms"), row.names = c(NA, -200L), class = c("data.table", "data.frame"))

前100行都共享相同的时间（13:58:23）和日期（26-06-2017），但是它们都有10毫秒。结果应该只有一个条目，且每秒10毫秒，并且随后的毫秒将添加到先前的毫秒中。

此代码段将按以下顺序创建结果：

testdata$ms = rep(seq(from = 10, to = 1000, by = 10), 2)

但是由于原始数据不是那么干净，所以我必须按日期和时间对数据进行分组，然后以行方式将毫秒相加。

我更喜欢data.table解决方案，但是dplyr也可以正常工作。

Answer 1

听起来您需要分组的cumsum：

library(dplyr) 

testdata$ms2 = rep(seq(from = 10, to = 1000, by = 10), 2)

testdata %>%
    group_by(local_date, local_time) %>%
    mutate(cumsum_ms = cumsum(ms))

   local_date local_time    ms   ms2 cumsum_ms
   <chr>      <chr>      <dbl> <dbl>     <dbl>
 1 26-06-2017 13:58:23      10    10        10
 2 26-06-2017 13:58:23      10    20        20
 3 26-06-2017 13:58:23      10    30        30
 4 26-06-2017 13:58:23      10    40        40
 5 26-06-2017 13:58:23      10    50        50

Answer 2

并添加一个data.table版本：

testdata[, ms := cumsum(ms), by = .(local_time, local_date)]

如何在分组的列中按行添加值

2 个答案: