我有以下两个数据帧:
library(reshape2)
#y1
Date_from <- c("2013-01-01","2013-01-06")
Date_to <- c("2013-01-05","2013-01-08")
conc1<-c("1.5","2.5")
conc2<-c("2","3")
y1 <- data.frame(Date_from,Date_to,conc1,conc2)
y1$Date_from <- as.Date(y1$Date_from)
y1$Date_to <- as.Date(y1$Date_to)
y1<-melt(y1, id=c("Date_from","Date_to"))
y1$value<-as.numeric(y1$value)
#y2:
Date <- c("2013-01-01","2013-01-02","2013-01-03","2013-01-04","2013-01-05","2013-01-06","2013-01-07","2013-01-08")
conc1<-c("1.5","2.5","1.5","3.5","1.5","2.5","1.5","3.5")
conc2<-c("2.6","2.8","1.4","3.7","2.6","2.8","1.4","3.7")
y2 <- data.frame(Date,conc1,conc2)
y2$Date <- as.Date(y2$Date)
Y1包含在一定日期范围内测得的两种金属的浓度,y2包含这两种金属的模拟每日金属浓度数值。我需要根据y1的日期范围计算y2中的平均金属浓度,并将其添加到y1中。
结果应如下所示:
Date_from Date_to variable value new_value
2013-01-01 2013-01-05 conc1 1.5 2.1
2013-01-06 2013-01-08 conc1 2.5 2.5
2013-01-01 2013-01-05 conc2 2.0 2.62
2013-01-06 2013-01-08 conc2 3.0 2.63
其中“ new_value”列包含相应日期范围内y2的conc1和conc2的平均值。
更新:
我尝试以下操作均未成功:
y2_melt<-melt(y2, id="Date")
y2_melt$value<-as.numeric(y2_melt$value)
helper <- merge(y2_melt,y1)
helper <- helper[helper$Date >= helper$Date_from & helper$Date <= helper$Date_to, ]
final1<-aggregate(helper$variable,
list(Date_from = helper$Date_from,
Date_to = helper$Date_to),
FUN="sum")
final2<-merge(final1,y2)
final3<-arrange(final2,Date_from,Date_to)
答案 0 :(得分:1)
在下面,我提供一些直接代码,并解释我的所作所为。
但是,我请您将其放入函数中并编写得更好:
要了解代码,您只需要检查seq()
命令和filter()
软件包中的dplyr()
:
library(dplyr)
## Creating a Dataframe to store the data
meanres<-data.frame(Date_from=0, Date_to = 0, newconc1=0, newconc2=0)
for(i in 1:length(Date_from)){
# for each Date_from in the vector
### (1) Get the sequence between Date(From-To)
### (2) Filter the Data by the intersect of Dates in SequenceDates
### (3) Calculate mean
SequenceDates<-seq(as.Date(Date_from[i]), as.Date(Date_to[i]), by="days")
result <- filter(y2,y2$Date %in% SequenceDates)
meanres[i,] <- c(Date_from[i], Date_to[i],
mean(as.numeric(paste(result$conc1))),
mean(as.numeric(paste(result$conc2))))
}
# Putting it as desired
Dataframe <-melt(meanres, id.vars = c('Date_from', 'Date_to'))
colnames(Dataframe)<-c("Date_from", "Date_to", "variable", "newvalue")
Dataframe
# Date_from Date_to variable newvalue
# 1 2013-01-01 2013-01-05 newconc1 2.1
# 2 2013-01-06 2013-01-08 newconc1 2.5
# 3 2013-01-01 2013-01-05 newconc2 2.62
# 4 2013-01-06 2013-01-08 newconc2 2.63333333333333
您现在只剩下旧值了;)。我希望这个对你有用 !。随时询问您是否有疑问
答案 1 :(得分:1)
这是基于y1
和y2
重新格式化的选项:
library(data.table)
library(dplyr)
# with y1
# expand df1 by days to make easy the merge with y2
y1 <- setDT(y1)[ , .(Date_from = Date_from,
Date_to = Date_to,
Date = seq(Date_from, Date_to, by = "days"),
variable = variable,
value = value), by = 1:nrow(y1)]
# with y2
y2 <- melt(y2, id = "Date") # apply melt to y2 too
y2$value <- as.numeric(y2$value) # also set value from y2 as numeric
# merge y2 and y1
df <- left_join(y2, y1, by = c("Date", "variable"), suffix = c("_y2", "_y1"))
str(df) # check df
# get the mean of value_y2 while grouping by Date_from, Date_to and variable
df %>%
group_by(Date_from, Date_to, variable) %>%
summarise(new_value = mean(value_y2))
# output
# A tibble: 4 x 4
# Groups: Date_from, Date_to [?]
Date_from Date_to variable new_value
<date> <date> <fctr> <dbl>
1 2013-01-01 2013-01-05 conc1 2.100000
2 2013-01-01 2013-01-05 conc2 2.620000
3 2013-01-06 2013-01-08 conc1 2.500000
4 2013-01-06 2013-01-08 conc2 2.633333