我是R的新手,所以如果这是一个基本问题,我道歉。我有一个12k天河高数据的df。我已经为河流的过滤器做了一个过滤器> 28英尺但是现在我想将这个过滤后的df打破到一个列表中,当河流连续x天大于28英尺时。例如,如果河流大于28英尺20天,我希望将它组合在一起。
library(dplyr)
RawData <- read.csv("c:/Users/Anthony/Desktop/R/CSVRiverData.csv")
RiverData <- cbind(data.frame(as.Date(RawData$Row.Labels, format = "%m/%d/%Y")),
RawData$Average.of.height)
colnames(RiverData) <- c("Date","RiverHeight")
Filt_River_Data <- filter(RiverData,RiverData$RiverHeight >28)
Date_Diff <- data.frame(Filt_River_Data$Date - lag(Filt_River_Data$Date, 1L))
答案 0 :(得分:0)
这是我的蛮力方法,以获得连续的天数,然后将它们分组。在那里可能有一个更优雅的解决方案,但这是一个简单的方法来做循环。
# get some reasonable pseudo data
library(forecast)
rd <- rnorm(1010,mean=28,sd=10)
rd <- forecast::ma(rd,order=10)
rd <- rd[!(is.na(rd))]
temp.dd <- seq.Date(as.Date("2000-01-01"),by=1,length.out=length(rd))
RiverData <- data.frame(temp.dd,rd)
colnames(RiverData) <- c('Date','RiverHeight')
# add vector to df and calculate number of consecutive days
## assume that you want to calcualte # consecutive days based on raw data, not pre-filtered
RiverData$numConDays <- rep(NA,nrow(RiverData))
count = 0
h_thresh <- 28
for (i in 1:nrow(RiverData)) {
if (RiverData$RiverHeight[i] >= h_thresh) {
count = count + 1
} else {
count = 0
}
RiverData$numConDays[i] <- count
}
# now you can filter as you wish
## the first condition of > h_thresh shouldn't be necessary
RiverDataSubset <- RiverData[(RiverData$RiverHeight > h_thresh & RiverData$numConDays > 20),]
head(RiverDataSubset)
# get the grouping for each set
## assumes daily data
RiverDataSubset$group <- rep(NA,nrow(RiverDataSubset))
gg <- 1
RiverDataSubset$group[1] <- gg
for (i in 2:nrow(RiverDataSubset)) {
if (as.numeric(difftime(RiverDataSubset$Date[i],RiverDataSubset$Date[i-1])) != 1) {
gg <- gg +1
}
RiverDataSubset$group[i] <- gg
}