如何按日期在R中连续创建列表?

时间:2017-10-21 01:28:22

标签: r

我是R的新手,所以如果这是一个基本问题,我道歉。我有一个12k天河高数据的df。我已经为河流的过滤器做了一个过滤器> 28英尺但是现在我想将这个过滤后的df打破到一个列表中,当河流连续x天大于28英尺时。例如,如果河流大于28英尺20天,我希望将它组合在一起。

library(dplyr)

RawData <- read.csv("c:/Users/Anthony/Desktop/R/CSVRiverData.csv")

RiverData <- cbind(data.frame(as.Date(RawData$Row.Labels, format = "%m/%d/%Y")), 
                   RawData$Average.of.height)

colnames(RiverData) <- c("Date","RiverHeight")


Filt_River_Data <- filter(RiverData,RiverData$RiverHeight >28)

Date_Diff <- data.frame(Filt_River_Data$Date - lag(Filt_River_Data$Date, 1L))

1 个答案:

答案 0 :(得分:0)

这是我的蛮力方法,以获得连续的天数,然后将它们分组。在那里可能有一个更优雅的解决方案,但这是一个简单的方法来做循环。

# get some reasonable pseudo data
library(forecast)
rd <- rnorm(1010,mean=28,sd=10)
rd <- forecast::ma(rd,order=10)
rd <- rd[!(is.na(rd))]
temp.dd <- seq.Date(as.Date("2000-01-01"),by=1,length.out=length(rd))
RiverData <- data.frame(temp.dd,rd)
colnames(RiverData) <- c('Date','RiverHeight')

# add vector to df and calculate number of consecutive days
## assume that you want to calcualte # consecutive days based on raw data, not pre-filtered
RiverData$numConDays <- rep(NA,nrow(RiverData))
count = 0
h_thresh <- 28
for (i in 1:nrow(RiverData)) {
  if (RiverData$RiverHeight[i] >= h_thresh) {
    count = count + 1
  } else {
    count = 0
  }
  RiverData$numConDays[i] <- count
}

# now you can filter as you wish
## the first condition of > h_thresh shouldn't be necessary
RiverDataSubset <- RiverData[(RiverData$RiverHeight > h_thresh & RiverData$numConDays > 20),]
head(RiverDataSubset)

# get the grouping for each set
## assumes daily data
RiverDataSubset$group <- rep(NA,nrow(RiverDataSubset))
gg <- 1
RiverDataSubset$group[1] <- gg
for (i in 2:nrow(RiverDataSubset)) {
  if (as.numeric(difftime(RiverDataSubset$Date[i],RiverDataSubset$Date[i-1])) != 1) {
    gg <- gg +1
  }
  RiverDataSubset$group[i] <- gg
}