通过聚合移动方差

时间:2018-05-18 02:06:38

标签: r time-series aggregation zoo

我有6分钟的频率热电偶数据。热电偶安装在不同的高度,每个高度都有一个以径向位置区分的热电偶

DT_TI_RECORDED      HEIGHT      POS             TEMPERATURE
2018-05-16 00:00:00     1       90              111
2018-05-16 00:00:00     1       180             112
2018-05-16 00:00:00     1       270             113
2018-05-16 00:00:00     2       90              112
2018-05-16 00:00:00     2       180             114
2018-05-16 00:00:00     2       270             115
2018-05-16 00:00:00     3       90              112
2018-05-16 00:00:00     3       180             112
2018-05-16 00:00:00     3       270             113
...
2018-05-16 00:06:00     1       90              111
2018-05-16 00:06:00     1       180             112
2018-05-16 00:06:00     1       270             113
2018-05-16 00:06:00     2       90              112
2018-05-16 00:06:00     2       180             114
2018-05-16 00:06:00     2       270             112
2018-05-16 00:06:00     3       90              114
2018-05-16 00:06:00     3       180             112
2018-05-16 00:06:00     3       270             114
...

对于每个独特的高度和位置组合,每6分钟我想计算一个向后 n 每小时移动的方差让我们说4小时。

我试图复制的原始代码是为SAS stats包编写的

    PROC EXPAND DATA=Raw_data
        OUT=Moving_Variance
        ALIGN = BEGINNING
    ;
    by HEIGHT POS;
    ID DT_TI_RECORDED ;
        CONVERT TEMPERATURE = Moving_4hour_Var /  METHOD = none TRANSFORMOUT = (MOVVAR 40); 
    #/* 40 obs at 6min freq = 4hour moving variance*/
    QUIT;

我花了几个小时搜索谷歌我认为我需要使用的R库名为zoo我想要的功能是rollapply但是我无法弄清楚如何组合与rollapply汇总。

我试过了

moving_var <- Raw_data %>%
              aggregate(HEIGHT,POS) %>%
              rollapply( TEMPERATURE, width = 40, FUN = sd, fill = NA)

但是不起作用。我对R编程很新,这让我很疯狂。

1 个答案:

答案 0 :(得分:1)

尝试以下聚合:

library(zoo)

result = aggregate(temp ~ pos + height,
              data = df,
              FUN = function(x){
                  rollapply(x, width = 40, FUN = var, by = 40)
              }
)

width是滚动窗口的宽度,而by是下一个窗口起点跳过的点数。每个窗口都有40个,每个窗口的起点都在前一个窗口的末尾。

结果数据框对每个窗口都有一列。这种结构可以被认为是“宽”的。如果您想将其设为“长”格式,请使用tidyr中的gather或reshape2中的melt

示例:

df = structure(list(pos = c(0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 
                            180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 
                            0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 
                            270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 
                            180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 
                            0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 
                            270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 
                            180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 
                            0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 
                            270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 
                            180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 0, 90, 180, 270, 
                            0, 90, 180, 270), height = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
                            3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), temp = 1:160), .Names = c("pos", 
                            "height", "temp"), row.names = c(NA, -160L), class = "data.frame")

> head(df,20)
   pos height temp
1    0      1    1
2   90      1    2
3  180      1    3
4  270      1    4
5    0      2    5
6   90      2    6
7  180      2    7
8  270      2    8
9    0      3    9
10  90      3   10
11 180      3   11
12 270      3   12
13   0      4   13
14  90      4   14
15 180      4   15
16 270      4   16
17   0      1   17
18  90      1   18
19 180      1   19
20 270      1   20


library(zoo)

result = aggregate(temp ~ pos + height,
              data = df,
              FUN = function(x){
                  rollapply(x, width = 3, FUN = var, by = 3)
              }
)

将导致:

   pos height temp.1 temp.2 temp.3
1    0      1    256    256    256
2   90      1    256    256    256
3  180      1    256    256    256
4  270      1    256    256    256
5    0      2    256    256    256
6   90      2    256    256    256
7  180      2    256    256    256
8  270      2    256    256    256
9    0      3    256    256    256
10  90      3    256    256    256
11 180      3    256    256    256
12 270      3    256    256    256
13   0      4    256    256    256
14  90      4    256    256    256
15 180      4    256    256    256
16 270      4    256    256    256