如何计算R中图表的增量和减量?

时间:2017-03-14 16:33:20

标签: r ggplot2 statistics time-series

我在r中有一个dataframe,每5分钟就有一个dataValue,就像油箱中的升数一样:

id sensorValue timeStamp delta
1   586   08-11-16 23:39   0
2   595   08-11-16 23:44   9
3   586   08-11-16 23:49   -9
4   586   08-11-16 23:55   0
5   586   08-11-16 23:59   0
6   576   09-11-16 00:04   -10
7   595   09-11-16 00:09   19
8   586   09-11-16 00:14   -9
9   586   09-11-16 00:19   0

行一直持续2个月。现在,如果我顺利(考虑到气体膨胀量为罐容量的2%,960升)并且在时间范围内ggplot sensorValues,它给出了一个这样的图表:

enter image description here

问题是,当我尝试计算图表中的减量和增量时,每行指向下方必须计算一个减量,每个向上指向的箭头必须计为增量,但如果您注意到每行代表一个5分钟在我的数据框中注册,每个增量或减量由多行组成,我想知道是否有办法计算油箱装满的时间,以及当油箱中的气体被使用时,忽略正常的膨胀由于收缩或膨胀导致的体积。

这是dput的输出(df [1:50,]):

structure(list(sensorValue = c(586, 595, 586, 586, 586, 576, 
595, 586, 586, 576, 586, 576, 576, 586, 586, 586, 586, 586, 595, 
586, 586, 586, 586, 576, 586, 586, 586, 595, 586, 576, 576, 586, 
586, 586, 595, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 
586, 595, 586, 586, 586), TimeStamp = structure(c(1478669973, 
1478670292, 1478670583, 1478670901, 1478671193, 1478671482, 1478671773, 
1478672092, 1478672383, 1478672673, 1478672993, 1478673283, 1478673575, 
1478673894, 1478674185, 1478674474, 1478674794, 1478675084, 1478675375, 
1478675694, 1478675985, 1478676274, 1478676594, 1478676884, 1478677175, 
1478677494, 1478677785, 1478678075, 1478678395, 1478678684, 1478678977, 
1478679295, 1478679587, 1478679876, 1478680196, 1478680486, 1478680777, 
1478681095, 1478681386, 1478681676, 1478681996, 1478682286, 1478682577, 
1478682895, 1478683186, 1478683476, 1478683796, 1478684086, 1478684377, 
1478684695), class = c("POSIXct", "POSIXt"), tzone = ""), capacidad = c(961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961), delta = c(0, 
9, -9, 0, 0, -10, 19, -9, 0, -10, 10, -10, 0, 10, 0, 0, 0, 0, 
9, -9, 0, 0, 0, -10, 10, 0, 0, 9, -9, -10, 0, 10, 0, 0, 9, -9, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0), smoothValue = c(586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 576, 576, 576, 586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 586)), .Names = c("sensorValue", 
"TimeStamp", "capacidad", "delta", "smoothValue"), row.names = c(NA, 
50L), class = "data.frame")

输出dput(df [660:720,]):

structure(list(sensorValue = c(586, 595, 586, 586, 586, 576, 
595, 586, 586, 576, 586, 576, 576, 586, 586, 586, 586, 586, 595, 
586, 586, 586, 586, 576, 586, 586, 586, 595, 586, 576, 576, 586, 
586, 586, 595, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 
586, 595, 586, 586, 586), TimeStamp = structure(c(1478669973, 
1478670292, 1478670583, 1478670901, 1478671193, 1478671482, 1478671773, 
1478672092, 1478672383, 1478672673, 1478672993, 1478673283, 1478673575, 
1478673894, 1478674185, 1478674474, 1478674794, 1478675084, 1478675375, 
1478675694, 1478675985, 1478676274, 1478676594, 1478676884, 1478677175, 
1478677494, 1478677785, 1478678075, 1478678395, 1478678684, 1478678977, 
1478679295, 1478679587, 1478679876, 1478680196, 1478680486, 1478680777, 
1478681095, 1478681386, 1478681676, 1478681996, 1478682286, 1478682577, 
1478682895, 1478683186, 1478683476, 1478683796, 1478684086, 1478684377, 
1478684695), class = c("POSIXct", "POSIXt"), tzone = ""), capacidad = c(961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961), delta = c(0, 
9, -9, 0, 0, -10, 19, -9, 0, -10, 10, -10, 0, 10, 0, 0, 0, 0, 
9, -9, 0, 0, 0, -10, 10, 0, 0, 9, -9, -10, 0, 10, 0, 0, 9, -9, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -9, 0, 0), smoothValue = c(586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 576, 576, 576, 586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 586, 
586, 586, 586, 586, 586, 586, 586, 586, 586, 586)), .Names = c("sensorValue", 
"TimeStamp", "capacidad", "delta", "smoothValue"), row.names = c(NA, 
50L), class = "data.frame")
> dput(df[660:720,])
structure(list(sensorValue = c(432, 442, 442, 442, 442, 442, 
442, 442, 442, 442, 442, 442, 442, 442, 442, 442, 432, 442, 490, 
922, 912, 922, 922, 932, 912, 922, 922, 922, 922, 922, 922, 922, 
922, 922, 932, 912, 912, 922, 922, 912, 922, 912, 922, 912, 922, 
912, 922, 922, 922, 912, 912, 912, 922, 912, 922, 922, 922, 922, 
903, 912, 912), TimeStamp = structure(c(1478867679, 1478868000, 
1478868291, 1478868582, 1478868874, 1478869195, 1478869485, 1478869777, 
1478870097, 1478870389, 1478870679, 1478871000, 1478871291, 1478871582, 
1478871874, 1478872195, 1478872485, 1478872777, 1478873097, 1478873389, 
1478873679, 1478874000, 1478874291, 1478874583, 1478874874, 1478875195, 
1478875485, 1478875777, 1478876097, 1478876389, 1478876679, 1478877000, 
1478877291, 1478877583, 1478877874, 1478878195, 1478878485, 1478878777, 
1478879097, 1478879389, 1478879680, 1478880000, 1478880291, 1478880583, 
1478880874, 1478881195, 1478881485, 1478881777, 1478882097, 1478882389, 
1478882680, 1478883001, 1478883291, 1478883583, 1478883874, 1478884195, 
1478884485, 1478884777, 1478885097, 1478885389, 1478885680), class = c("POSIXct", 
"POSIXt"), tzone = ""), capacidad = c(961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 961, 
961, 961, 961, 961), delta = c(-10, 10, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, -10, 10, 48, 432, -10, 10, 0, 10, -20, 10, 
0, 0, 0, 0, 0, 0, 0, 0, 10, -20, 0, 10, 0, -10, 10, -10, 10, 
-10, 10, -10, 10, 0, 0, -10, 0, 0, 10, -10, 10, 0, 0, 0, -19, 
9, 0), smoothValue = c(442, 442, 442, 442, 442, 442, 442, 442, 
442, 442, 442, 442, 442, 442, 442, 442, 442, 442, 490, 912, 922, 
922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 
922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 922, 
922, 922, 912, 912, 912, 912, 922, 922, 922, 922, 922, 912, 912, 
912)), .Names = c("sensorValue", "TimeStamp", "capacidad", "delta", 
"smoothValue"), row.names = 660:720, class = "data.frame")

1 个答案:

答案 0 :(得分:1)

根据您的问题,看起来您想要计算等级下降或上升时的跑步次数。当水平下降时,我们可以使用diffsign来获得等于-1的向量,当水平上升时,可以使用1。然后我们分别将其重新编号为FallingRising。最后,要计算FallingRising级别的期间数,我们会为每次FallingRising次值创建一个单独的组。

library(dplyr)

# Fake data
set.seed(113)
dat = data.frame(sensorValue=cumsum(sample(c(-1,10),100,replace=TRUE,prob=c(10,1))) + 500, 
                 timeStamp=seq(0,495,5))

这是假数据的样子:

library(ggplot2)
theme_set(theme_classic())

ggplot(dat, aes(timeStamp, sensorValue)) + 
  geom_line()

enter image description here

runs = with(dat, sign(diff(sensorValue)))
slope = recode(runs, "-1"="Falling", "1"="Rising")
groups = c(0, cumsum(diff(runs) != 0))

run.data = data.frame(runs, slope, groups)

run.data %>% group_by(groups) %>%
  slice(1) %>%
  group_by(slope) %>% 
  tally
    slope     n
1 Falling    11
2  Rising    10

更新:根据您的评论,我们需要进行一些过滤以消除数据中的小起落。您可以使用低通滤波器来消除高频噪声。但在这种情况下,也许一种更简单的方法可行。

在下面的代码中,我们计算每个连续测量之间的差异,就像之前一样,但如果差异小于25,我们将差值设置为零。您可以将此截止值调整为最适合摆脱小噪音跳跃的值,而不会消除您感兴趣的较大动作。

首先,我合并了您发布的两个数据样本,并添加了一个新的time2列,用于消除两个样本之间的时间差距,仅供参考。

dat = rbind(dat, dat2)

# Put both data samples on a continuous 5-second time scale
dat$time2 = seq(0,nrow(dat)*5 - 5, 5)

现在我们运行与之前相同的代码,但截止值为25,低于此值时我们将delta设置为零。

runs = with(dat, sign(ifelse(abs(diff(sensorValue)) < 25, 0, diff(sensorValue))))
slope = recode(runs, "-1"="Falling", "0"="Stable", "1"="Rising")
groups = c(0, cumsum(diff(runs) != 0))

run.data = data.frame(runs, slope, groups)

run.data %>% group_by(groups) %>%
  slice(1) %>%
  group_by(slope) %>% 
  tally
    slope     n
   <fctr> <int>
1 Falling     1
2  Rising     1
3  Stable     3

一次大跌和一次大涨似乎与数据样本一致:

ggplot(dat, aes(time2, sensorValue)) + 
  geom_line(size=1)

enter image description here