根据第二个数据框中的日期范围计算平均浓度

时间:2018-09-07 08:07:01

标签: r

我有以下两个数据帧:

library(reshape2)

#y1
Date_from <- c("2013-01-01","2013-01-06")
Date_to <- c("2013-01-05","2013-01-08")
conc1<-c("1.5","2.5")
conc2<-c("2","3")
y1 <- data.frame(Date_from,Date_to,conc1,conc2)
y1$Date_from <- as.Date(y1$Date_from)
y1$Date_to <- as.Date(y1$Date_to)
y1<-melt(y1, id=c("Date_from","Date_to"))
y1$value<-as.numeric(y1$value)

#y2:
Date <- c("2013-01-01","2013-01-02","2013-01-03","2013-01-04","2013-01-05","2013-01-06","2013-01-07","2013-01-08")
conc1<-c("1.5","2.5","1.5","3.5","1.5","2.5","1.5","3.5")
conc2<-c("2.6","2.8","1.4","3.7","2.6","2.8","1.4","3.7")
y2 <- data.frame(Date,conc1,conc2)
y2$Date <- as.Date(y2$Date)

Y1包含在一定日期范围内测得的两种金属的浓度,y2包含这两种金属的模拟每日金属浓度数值。我需要根据y1的日期范围计算y2中的平均金属浓度,并将其添加到y1中。

结果应如下所示:

Date_from    Date_to     variable value  new_value
2013-01-01 2013-01-05    conc1     1.5    2.1
2013-01-06 2013-01-08    conc1     2.5    2.5
2013-01-01 2013-01-05    conc2     2.0    2.62
2013-01-06 2013-01-08    conc2     3.0    2.63

其中“ new_value”列包含相应日期范围内y2的conc1和conc2的平均值。

更新:

我尝试以下操作均未成功:

y2_melt<-melt(y2, id="Date")
y2_melt$value<-as.numeric(y2_melt$value)
helper <- merge(y2_melt,y1)
helper <- helper[helper$Date >= helper$Date_from & helper$Date <= helper$Date_to, ]
final1<-aggregate(helper$variable,
                     list(Date_from = helper$Date_from,
                          Date_to = helper$Date_to),
                          FUN="sum")
final2<-merge(final1,y2)
final3<-arrange(final2,Date_from,Date_to)

2 个答案:

答案 0 :(得分:1)

在下面,我提供一些直接代码,并解释我的所作所为。 但是,我请您将其放入函数中并编写得更好: 要了解代码,您只需要检查seq()命令和filter()软件包中的dplyr()

library(dplyr)
## Creating a Dataframe to store the data
meanres<-data.frame(Date_from=0, Date_to = 0, newconc1=0, newconc2=0)
for(i in 1:length(Date_from)){
  # for each Date_from in the vector
  ### (1) Get the sequence between Date(From-To)
  ### (2) Filter the Data by the intersect of Dates in SequenceDates
  ### (3) Calculate mean

  SequenceDates<-seq(as.Date(Date_from[i]), as.Date(Date_to[i]), by="days")  
  result <- filter(y2,y2$Date %in% SequenceDates)
  meanres[i,] <- c(Date_from[i], Date_to[i],
                  mean(as.numeric(paste(result$conc1))), 
                  mean(as.numeric(paste(result$conc2))))
}
# Putting it as desired
Dataframe <-melt(meanres, id.vars = c('Date_from', 'Date_to'))
colnames(Dataframe)<-c("Date_from", "Date_to", "variable", "newvalue")
Dataframe

# Date_from    Date_to variable         newvalue
# 1 2013-01-01 2013-01-05 newconc1              2.1
# 2 2013-01-06 2013-01-08 newconc1              2.5
# 3 2013-01-01 2013-01-05 newconc2             2.62
# 4 2013-01-06 2013-01-08 newconc2 2.63333333333333

您现在只剩下旧值了;)。我希望这个对你有用 !。随时询问您是否有疑问

答案 1 :(得分:1)

这是基于y1y2重新格式化的选项:

library(data.table)
library(dplyr)
# with y1
# expand df1 by days to make easy the merge with y2
y1 <- setDT(y1)[ , .(Date_from = Date_from,
                     Date_to = Date_to,
                     Date = seq(Date_from, Date_to, by = "days"),
                     variable = variable,
                     value = value), by = 1:nrow(y1)]
# with y2
y2 <- melt(y2, id = "Date") # apply melt to y2 too
y2$value <- as.numeric(y2$value) # also set value from y2 as numeric
# merge y2 and y1
df <- left_join(y2, y1, by = c("Date", "variable"), suffix = c("_y2", "_y1"))
str(df) # check df
# get the mean of value_y2 while grouping by Date_from, Date_to and variable
df %>%
        group_by(Date_from, Date_to, variable) %>%
        summarise(new_value = mean(value_y2))
# output
# A tibble: 4 x 4
# Groups:   Date_from, Date_to [?]
   Date_from    Date_to variable new_value
      <date>     <date>   <fctr>     <dbl>
1 2013-01-01 2013-01-05    conc1  2.100000
2 2013-01-01 2013-01-05    conc2  2.620000
3 2013-01-06 2013-01-08    conc1  2.500000
4 2013-01-06 2013-01-08    conc2  2.633333