我有df看起来像这样
options(scipen=999)
df = data.frame(imei = c(35745407328, 35745407328, 35745407328,
35745407328, 35745407328, 35745407328,
35745407328, 35745407328, 35745407328,
35745407328, 35745407328, 35745407328),
ign = c("Off", "Off", "Off", "On",
"On", "Off", "Off", "On",
"On", "On", "On", "On" ),
unixTime = c(1514313014000, 1514313074000, 1514313134000, 1514313194000,
1514313254000, 1514313314000, 1514313374000, 1514313434000,
1514313494000, 1514313554000, 1514313614000, 1514313674000))
DF
----------------------------------
imei ign unixTime
----------------------------------
35745407328 Off 1514313014000
----------------------------------
35745407328 Off 1514313074000
----------------------------------
35745407328 Off 1514313134000
----------------------------------
35745407328 On 1514313194000
----------------------------------
35745407328 On 1514313254000
----------------------------------
35745407328 Off 1514313314000
----------------------------------
35745407328 Off 1514313374000
----------------------------------
35745407328 On 1514313434000
----------------------------------
35745407328 On 1514313494000
----------------------------------
35745407328 On 1514313554000
----------------------------------
35745407328 On 1514313614000
----------------------------------
35745407328 On 1514313674000
----------------------------------
我想根据'ign'然后
对上述数据进行分组我想计算平均值,即unixTime的标准偏差。
并基于组差异中的第一个和最后一个值 of unixTime
根据平均值,sd和diff进行分组
----------------------------------
imei ign unixTime
----------------------------------
35745407328 Off 1514313014000
----------------------------------
35745407328 Off 1514313074000
----------------------------------
35745407328 Off 1514313134000
----------------------------------
----------------------------------
35745407328 On 1514313194000
----------------------------------
35745407328 On 1514313254000
----------------------------------
----------------------------------
35745407328 Off 1514313314000
----------------------------------
35745407328 Off 1514313374000
----------------------------------
----------------------------------
35745407328 On 1514313434000
----------------------------------
35745407328 On 1514313494000
----------------------------------
35745407328 On 1514313554000
----------------------------------
35745407328 On 1514313614000
----------------------------------
35745407328 On 1514313674000
----------------------------------
请帮我解决这个问题
如果答案已经提供。请给我链接。三江源
答案 0 :(得分:1)
使用data.table的解决方案。
library(data.table)
setDT(df)
df2 <- df[, Group := rleid(ign)][
, c("Mean", "SD", "Diff") := list(mean(unixTime),
sd(unixTime),
first(unixTime) - last(unixTime)),
by = Group]
df2[]
# imei ign unixTime Group Mean SD Diff
# 1: 35745407328 Off 1514313014000 1 1514313074000 60000.00 -120000
# 2: 35745407328 Off 1514313074000 1 1514313074000 60000.00 -120000
# 3: 35745407328 Off 1514313134000 1 1514313074000 60000.00 -120000
# 4: 35745407328 On 1514313194000 2 1514313224000 42426.41 -60000
# 5: 35745407328 On 1514313254000 2 1514313224000 42426.41 -60000
# 6: 35745407328 Off 1514313314000 3 1514313344000 42426.41 -60000
# 7: 35745407328 Off 1514313374000 3 1514313344000 42426.41 -60000
# 8: 35745407328 On 1514313434000 4 1514313554000 94868.33 -240000
# 9: 35745407328 On 1514313494000 4 1514313554000 94868.33 -240000
# 10: 35745407328 On 1514313554000 4 1514313554000 94868.33 -240000
# 11: 35745407328 On 1514313614000 4 1514313554000 94868.33 -240000
# 12: 35745407328 On 1514313674000 4 1514313554000 94868.33 -240000
的解决方案
library(dplyr)
library(data.table)
df2 <- df %>%
group_by(Group = rleid(ign)) %>%
mutate(Mean = mean(unixTime), SD = sd(unixTime),
Diff = first(unixTime) - last(unixTime)) %>%
ungroup()
df2
# imei ign unixTime Group Mean SD Diff
# <dbl> <fctr> <dbl> <int> <dbl> <dbl> <dbl>
# 1 35745407328 Off 1514313014000 1 1514313074000 60000.00 -120000
# 2 35745407328 Off 1514313074000 1 1514313074000 60000.00 -120000
# 3 35745407328 Off 1514313134000 1 1514313074000 60000.00 -120000
# 4 35745407328 On 1514313194000 2 1514313224000 42426.41 -60000
# 5 35745407328 On 1514313254000 2 1514313224000 42426.41 -60000
# 6 35745407328 Off 1514313314000 3 1514313344000 42426.41 -60000
# 7 35745407328 Off 1514313374000 3 1514313344000 42426.41 -60000
# 8 35745407328 On 1514313434000 4 1514313554000 94868.33 -240000
# 9 35745407328 On 1514313494000 4 1514313554000 94868.33 -240000
# 10 35745407328 On 1514313554000 4 1514313554000 94868.33 -240000
# 11 35745407328 On 1514313614000 4 1514313554000 94868.33 -240000
# 12 35745407328 On 1514313674000 4 1514313554000 94868.33 -240000