确定时间序列数据中的高计数周期

时间:2014-11-30 13:34:26

标签: r

使用示例数据框:

count.bouts <-structure(list(time.stamp = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 
                                    7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 
                                    20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 28L, 29L, 30L, 31L, 
                                    32L, 33L, 34L, 35L, 36L, 37L), .Label = c("13:00:00", "13:00:10", 
                                                                              "13:00:20", "13:00:30", "13:00:40", "13:00:50", "13:01:00", "13:01:10", 
                                                                              "13:01:20", "13:01:30", "13:01:40", "13:01:50", "13:02:00", "13:02:10", 
                                                                              "13:02:20", "13:02:30", "13:02:40", "13:02:50", "13:03:00", "13:03:10", 
                                                                              "13:03:20", "13:03:30", "13:03:40", "13:03:50", "13:04:00", "13:04:10", 
                                                                              "13:04:20", "13:04:30", "13:04:40", "13:04:50", "13:05:00", "13:05:10", 
                                                                              "13:05:20", "13:05:30", "13:05:40", "13:05:50", "13:06:00"), class = "factor"), 
           count = c(5L, 11L, 16L, 19L, 15L, 11L, 8L, 5L, 2L, 6L, 12L, 
                     15L, 20L, 12L, 6L, 2L, 18L, 25L, 26L, 15L, 13L, 6L, 5L, 4L, 
                     8L, 9L, 16L, 26L, 29L, 55L, 21L, 6L, 9L, 28L, 16L, 19L, 26L, 
                     5L)), .Names = c("time.stamp", "count"), class = "data.frame", row.names = c(NA, 
                                                                                                  -38L))

我希望创建一个功能,识别符合以下条件的高计数活动:

  1. 计算大于或等于10的数据1分钟或更长时间
  2. 在这个高计数期间(或回合),我会允许计数数据降至10以下,最多20秒(在回合中)
  3. 满足此条件的数据我希望通过在数据框中添加一个额外的列(称为“1min + .bouts”)来突出显示在数据集中。然后每个回合将被识别为从1开始的数字 - 即上面描述的数据帧将在第一回合(13:01:40到13:03:20)有一系列1,然后是第二回合的2s(13) :04:20至13:05:50) 0s将添加到那些没有回合的行中。

    我希望这是有道理的。如果有人能够指出我正确的方向重新。包装或功能可以帮助我,我应该非常感激。

1 个答案:

答案 0 :(得分:3)

这假设没有NA值:

#which counts are >= 10
tmp <- count.bouts$count >= 10
#substitute FALSE with NA, so we can use na.approx for interpolation
tmp[!tmp] <- NA
library(zoo)
#fill gaps of up to two values
tmp <- na.approx(tmp, method = "constant", maxgap = 2, na.rm = FALSE)
#NA --> 0
tmp[is.na(tmp)] <- 0
#run lengths
tmp <- rle(tmp)
#we don't want run lengths shorter one minute
tmp$values[tmp$lengths < 6] <- 0
#number the run lengths we are interested in  
tmp$values <- cumsum(tmp$values) * tmp$values
#inverse run length encoding
count.bouts$bout <- inverse.rle(tmp)
#   time.stamp count bout
#1    13:00:00     5    0
#2    13:00:10    11    0
#3    13:00:20    16    0
#4    13:00:30    19    0
#5    13:00:40    15    0
#6    13:00:50    11    0
#7    13:01:00     8    0
#8    13:01:10     5    0
#9    13:01:20     2    0
#10   13:01:30     6    0
#11   13:01:40    12    1
#12   13:01:50    15    1
#13   13:02:00    20    1
#14   13:02:10    12    1
#15   13:02:20     6    1
#16   13:02:30     2    1
#17   13:02:40    18    1
#18   13:02:50    25    1
#19   13:03:00    26    1
#20   13:03:10    15    1
#21   13:03:20    13    1
#22   13:03:30     6    0
#23   13:03:40     5    0
#24   13:03:50     4    0
#25   13:04:00     8    0
#26   13:04:10     9    0
#27   13:04:20    16    2
#28   13:04:30    26    2
#29   13:04:30    29    2
#30   13:04:40    55    2
#31   13:04:50    21    2
#32   13:05:00     6    2
#33   13:05:10     9    2
#34   13:05:20    28    2
#35   13:05:30    16    2
#36   13:05:40    19    2
#37   13:05:50    26    2
#38   13:06:00     5    0