Question

我有一个数据框，其中包含特定时间跨度的值。我使用函数 find_peaks 。它们在名为 peak 的列中标记为 TRUE ：

  test＆lt;  - ＆＃xA ;结构（列表（年= 1996：2016，值= c（-0.5214506，-0.8037488，＆＃xA; 0.1138524,0.9939848,1.7027944,0.6448417,0.1204489,1-2254546，＆＃xA; -0.6733273，-0.7457323,0.4874829， 2.2080809,2.0609055，-2.5291374，＆＃xA; -1.5272201,0.3057773,0.1383523，-0.6455441，-0.8364883，-0.8907073，＆＃xA; -0.7940878），peak = c（FALSE，FALSE，FALSE，FALSE，TRUE，FALSE ，＆＃xA; FALSE，FALSE，FALSE，FALSE，FALSE，TRUE，FALSE，FALSE，FALSE，＆＃xA; FALSE，FALSE，FALSE，FALSE，FALSE，FALSE）），class = c（“tbl_df”，＆ #xA;“tbl”，“data.frame”），row.names = c（NA，-21L））＆＃xA;＆＃xA; test＆＃xA; #A tibble：21 x 3＆＃xA;年价值峰值＆＃xA; ＆LT; INT＆GT; ＆LT; DBL＆GT; ＆LT; LGL＆GT;＆＃XA; 1 1996年 -  0.521错误＆＃xA; 2 1997  -  0.804 FALSE＆＃xA; 3 1998年0.114 FALSE＆＃xA; 4 1999年0.994 FALSE＆＃xA; 5 2000 1.70 TRUE＆＃xA; 6 2001年0.645 FALSE＆＃xA; 7 2002 0.120 FALSE＆＃xA; 8 2003 -1.23 FALSE＆＃xA; 9 2004 -0.673 FALSE＆＃xA; 10 2005 -0.746 FALSE＆＃xA; 11 2006 0.487 FALSE＆＃xA; 12 2007 2.21 TRUE＆＃xA; 13 2008 2.06 FALSE＆＃xA; 14 2009 -2.53 FALSE＆＃xA; 15 2010 -1.53 FALSE＆＃xA; 16 2011 0.306 FALSE＆＃xA; 17 2012 0.138 FALSE＆＃xA; 18 2013 -0.646 FALSE＆＃xA; 19 2014 -0.836 FALSE＆＃xA; 20 2015 -0.891 FALSE＆＃xA; 21 2016 -0.794 FALSE＆＃xA;

＆＃xA;＆＃xA;

我必须找到位于峰值之前的连续非负值（+峰值）。在这个例子中有2个峰值，但可以有更多。结果应如下所示：

＆＃xA;＆＃xA;

  #A tibble：5 x 3＆＃xA;年价值峰值＆＃xA; ＆LT; INT＆GT; ＆LT; DBL＆GT; ＆lt; lgl＆gt;＆＃xA; 1 1998 0.114 FALSE＆＃xA; 2 1999 0.994 FALSE＆＃xA; 3 2000 1.70 TRUE＆＃xA; 4 2006 0.487 FALSE＆＃xA; 5 2007 2.21 TRUE＆＃xA;  < / pre>＆＃xA;＆＃xA; 我尝试过一些东西，但我找不到解决这个问题的方法。任何帮助，将不胜感激。 ＆＃XA;

Answer 1

这应该有效

#iterate over the rows of the table
for(i in 1:nrow(test)){

  #set some objects that will be used in the loop, you can define
  #them outside the loop too
  if(i == 1){
    #this is for the while loop
    k <- FALSE
    #where we put each wanted row of the table
    outList <- list()
    #a counter of the previous list
    j <- 0
  }

  #if the row contains a peak
  if(unname(unlist(test[i, 'peak']))){
    #update the list counter
    j <- j + 1
    #put the row in the list
    outList[[j]] <- test[i,]
    #update k to iterate backwards
    k <- TRUE
    m <- i
    while(k){
      #go one row behind to see if it is positive
      m <- m -1
      #if its positive put it in the list
      if(unname(unlist(test[m, 'value'])) > 0){
        j <- j + 1
        outList[[j]] <- test[m, ]
      #if its not positive stop the while loop
      }else{
        k <- FALSE
      }
    }

  }
}
#join all the rows together
do.call('rbind', outList)

唯一的问题是输出中的顺序与您在问题上写的顺序不一致。我不确定那有多重要。

 A tibble: 5 x 3
   year value peak 
  <int> <dbl> <lgl>
1  2000 1.70  TRUE 
2  1999 0.994 FALSE
3  1998 0.114 FALSE
4  2007 2.21  TRUE 
5  2006 0.487 FALSE

Answer 2

library(data.table)
setDT(test)

test[, `:=`(npeak = rev(cumsum(rev(peak)))
          ,  pos  = rleid(value >= 0))]
test[, preceding := pos == pos[peak]
     , by = npeak]
test[value > 0 & preceding, .(year, value, peak)]

或更简洁

library(magrittr)

test[, preceding := rleid(value >= 0) %>% `==`(.[peak])
     , by = peak %>% rev %>% cumsum %>% rev
     ][value > 0 & preceding, .(year, value, peak)]

#    year     value  peak
# 1: 1998 0.1138524 FALSE
# 2: 1999 0.9939848 FALSE
# 3: 2000 1.7027944  TRUE
# 4: 2006 0.4874829 FALSE
# 5: 2007 2.2080809  TRUE

以dplyr语法+ data.table::rleid()重写的解决方案：

library(dplyr)

test %>% 
  mutate(npeak = rev(cumsum(rev(peak))),
         pos = rleid(value >= 0)) %>% 
  filter(npeak != 0) %>% 
  group_by(npeak) %>% 
  mutate(preceding = value > 0 & pos == pos[peak]) %>%
  ungroup() %>% 
  filter(preceding == TRUE)

# A tibble: 5 x 6
   year value peak  npeak   pos preceding
  <int> <dbl> <lgl> <int> <int> <lgl>    
1  1998 0.114 FALSE     2     2 TRUE     
2  1999 0.994 FALSE     2     2 TRUE     
3  2000 1.70  TRUE      2     2 TRUE     
4  2006 0.487 FALSE     1     4 TRUE     
5  2007 2.21  TRUE      1     4 TRUE

找到局部最大值之前的正值

2 个答案: