我在r中有一个dataframe,每5分钟就有一个dataValue,就像油箱中的升数一样:
id sensorValue timeStamp delta
1 586 08-11-16 23:39 0
2 595 08-11-16 23:44 9
3 586 08-11-16 23:49 -9
4 586 08-11-16 23:55 0
5 586 08-11-16 23:59 0
6 576 09-11-16 00:04 -10
7 595 09-11-16 00:09 19
8 586 09-11-16 00:14 -9
9 586 09-11-16 00:19 0
行一直持续2个月。现在,如果我顺利(考虑到气体膨胀量为罐容量的2%,960升)并且在时间范围内ggplot sensorValues,它给出了一个这样的图表:
问题是,当我尝试计算图表中的减量和增量时,每行指向下方必须计算一个减量,每个向上指向的箭头必须计为增量,但如果您注意到每行代表一个5分钟在我的数据框中注册,每个增量或减量由多行组成,我想知道是否有办法计算油箱装满的时间,以及当油箱中的气体被使用时,忽略正常的膨胀由于收缩或膨胀导致的体积。
这是dput的输出(df [1:50,]):
structure(list(sensorValue = c(586, 595, 586, 586, 586, 576,
595, 586, 586, 576, 586, 576, 576, 586, 586, 586, 586, 586, 595,
586, 586, 586, 586, 576, 586, 586, 586, 595, 586, 576, 576, 586,
586, 586, 595, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586,
586, 595, 586, 586, 586), TimeStamp = structure(c(1478669973,
1478670292, 1478670583, 1478670901, 1478671193, 1478671482, 1478671773,
1478672092, 1478672383, 1478672673, 1478672993, 1478673283, 1478673575,
1478673894, 1478674185, 1478674474, 1478674794, 1478675084, 1478675375,
1478675694, 1478675985, 1478676274, 1478676594, 1478676884, 1478677175,
1478677494, 1478677785, 1478678075, 1478678395, 1478678684, 1478678977,
1478679295, 1478679587, 1478679876, 1478680196, 1478680486, 1478680777,
1478681095, 1478681386, 1478681676, 1478681996, 1478682286, 1478682577,
1478682895, 1478683186, 1478683476, 1478683796, 1478684086, 1478684377,
1478684695), class = c("POSIXct", "POSIXt"), tzone = ""), capacidad = c(961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961), delta = c(0,
9, -9, 0, 0, -10, 19, -9, 0, -10, 10, -10, 0, 10, 0, 0, 0, 0,
9, -9, 0, 0, 0, -10, 10, 0, 0, 9, -9, -10, 0, 10, 0, 0, 9, -9,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0), smoothValue = c(586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 576, 576, 576, 586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 586)), .Names = c("sensorValue",
"TimeStamp", "capacidad", "delta", "smoothValue"), row.names = c(NA,
50L), class = "data.frame")
输出dput(df [660:720,]):
structure(list(sensorValue = c(586, 595, 586, 586, 586, 576,
595, 586, 586, 576, 586, 576, 576, 586, 586, 586, 586, 586, 595,
586, 586, 586, 586, 576, 586, 586, 586, 595, 586, 576, 576, 586,
586, 586, 595, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586,
586, 595, 586, 586, 586), TimeStamp = structure(c(1478669973,
1478670292, 1478670583, 1478670901, 1478671193, 1478671482, 1478671773,
1478672092, 1478672383, 1478672673, 1478672993, 1478673283, 1478673575,
1478673894, 1478674185, 1478674474, 1478674794, 1478675084, 1478675375,
1478675694, 1478675985, 1478676274, 1478676594, 1478676884, 1478677175,
1478677494, 1478677785, 1478678075, 1478678395, 1478678684, 1478678977,
1478679295, 1478679587, 1478679876, 1478680196, 1478680486, 1478680777,
1478681095, 1478681386, 1478681676, 1478681996, 1478682286, 1478682577,
1478682895, 1478683186, 1478683476, 1478683796, 1478684086, 1478684377,
1478684695), class = c("POSIXct", "POSIXt"), tzone = ""), capacidad = c(961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961), delta = c(0,
9, -9, 0, 0, -10, 19, -9, 0, -10, 10, -10, 0, 10, 0, 0, 0, 0,
9, -9, 0, 0, 0, -10, 10, 0, 0, 9, -9, -10, 0, 10, 0, 0, 9, -9,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0), smoothValue = c(586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 576, 576, 576, 586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586,
586, 586, 586, 586, 586, 586, 586, 586, 586, 586)), .Names = c("sensorValue",
"TimeStamp", "capacidad", "delta", "smoothValue"), row.names = c(NA,
50L), class = "data.frame")
> dput(df[660:720,])
structure(list(sensorValue = c(432, 442, 442, 442, 442, 442,
442, 442, 442, 442, 442, 442, 442, 442, 442, 442, 432, 442, 490,
922, 912, 922, 922, 932, 912, 922, 922, 922, 922, 922, 922, 922,
922, 922, 932, 912, 912, 922, 922, 912, 922, 912, 922, 912, 922,
912, 922, 922, 922, 912, 912, 912, 922, 912, 922, 922, 922, 922,
903, 912, 912), TimeStamp = structure(c(1478867679, 1478868000,
1478868291, 1478868582, 1478868874, 1478869195, 1478869485, 1478869777,
1478870097, 1478870389, 1478870679, 1478871000, 1478871291, 1478871582,
1478871874, 1478872195, 1478872485, 1478872777, 1478873097, 1478873389,
1478873679, 1478874000, 1478874291, 1478874583, 1478874874, 1478875195,
1478875485, 1478875777, 1478876097, 1478876389, 1478876679, 1478877000,
1478877291, 1478877583, 1478877874, 1478878195, 1478878485, 1478878777,
1478879097, 1478879389, 1478879680, 1478880000, 1478880291, 1478880583,
1478880874, 1478881195, 1478881485, 1478881777, 1478882097, 1478882389,
1478882680, 1478883001, 1478883291, 1478883583, 1478883874, 1478884195,
1478884485, 1478884777, 1478885097, 1478885389, 1478885680), class = c("POSIXct",
"POSIXt"), tzone = ""), capacidad = c(961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961,
961, 961, 961, 961), delta = c(-10, 10, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, -10, 10, 48, 432, -10, 10, 0, 10, -20, 10,
0, 0, 0, 0, 0, 0, 0, 0, 10, -20, 0, 10, 0, -10, 10, -10, 10,
-10, 10, -10, 10, 0, 0, -10, 0, 0, 10, -10, 10, 0, 0, 0, -19,
9, 0), smoothValue = c(442, 442, 442, 442, 442, 442, 442, 442,
442, 442, 442, 442, 442, 442, 442, 442, 442, 442, 490, 912, 922,
922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922,
922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922,
922, 922, 912, 912, 912, 912, 922, 922, 922, 922, 922, 912, 912,
912)), .Names = c("sensorValue", "TimeStamp", "capacidad", "delta",
"smoothValue"), row.names = 660:720, class = "data.frame")
答案 0 :(得分:1)
根据您的问题,看起来您想要计算等级下降或上升时的跑步次数。当水平下降时,我们可以使用diff
和sign
来获得等于-1的向量,当水平上升时,可以使用1。然后我们分别将其重新编号为Falling
和Rising
。最后,要计算Falling
或Rising
级别的期间数,我们会为每次Falling
或Rising
次值创建一个单独的组。
library(dplyr)
# Fake data
set.seed(113)
dat = data.frame(sensorValue=cumsum(sample(c(-1,10),100,replace=TRUE,prob=c(10,1))) + 500,
timeStamp=seq(0,495,5))
这是假数据的样子:
library(ggplot2)
theme_set(theme_classic())
ggplot(dat, aes(timeStamp, sensorValue)) +
geom_line()
runs = with(dat, sign(diff(sensorValue)))
slope = recode(runs, "-1"="Falling", "1"="Rising")
groups = c(0, cumsum(diff(runs) != 0))
run.data = data.frame(runs, slope, groups)
run.data %>% group_by(groups) %>%
slice(1) %>%
group_by(slope) %>%
tally
slope n 1 Falling 11 2 Rising 10
更新:根据您的评论,我们需要进行一些过滤以消除数据中的小起落。您可以使用低通滤波器来消除高频噪声。但在这种情况下,也许一种更简单的方法可行。
在下面的代码中,我们计算每个连续测量之间的差异,就像之前一样,但如果差异小于25,我们将差值设置为零。您可以将此截止值调整为最适合摆脱小噪音跳跃的值,而不会消除您感兴趣的较大动作。
首先,我合并了您发布的两个数据样本,并添加了一个新的time2
列,用于消除两个样本之间的时间差距,仅供参考。
dat = rbind(dat, dat2)
# Put both data samples on a continuous 5-second time scale
dat$time2 = seq(0,nrow(dat)*5 - 5, 5)
现在我们运行与之前相同的代码,但截止值为25,低于此值时我们将delta设置为零。
runs = with(dat, sign(ifelse(abs(diff(sensorValue)) < 25, 0, diff(sensorValue))))
slope = recode(runs, "-1"="Falling", "0"="Stable", "1"="Rising")
groups = c(0, cumsum(diff(runs) != 0))
run.data = data.frame(runs, slope, groups)
run.data %>% group_by(groups) %>%
slice(1) %>%
group_by(slope) %>%
tally
slope n <fctr> <int> 1 Falling 1 2 Rising 1 3 Stable 3
一次大跌和一次大涨似乎与数据样本一致:
ggplot(dat, aes(time2, sensorValue)) +
geom_line(size=1)