我有以下数据框:
Date_from <- c("2013-01-01","2013-01-04")
Date_to <- c("2013-01-03","2013-01-06")
Parameter <- c("Par1","Par1","Par2","Par2")
conc<-c("1.5","2.5","1.5","1.8")
metals<-data.frame(Date_from,Date_to,Parameter,conc)
metals$Date_from<-as.Date(metals$Date_from)
metals$Date_to<-as.Date(metals$Date_to)
metals$conc<-as.numeric(as.character(metals$conc))
我需要做的是为每个参数提取每个日期范围内的日期,并将浓度值分配给该范围内的每个日期,并将所有这些信息放入新的数据框中。结果应如下所示:
Date Parameter conc
2013-01-01 Par1 1.5
2013-01-02 Par1 1.5
2013-01-03 Par1 1.5
2013-01-04 Par1 2.5
2013-01-05 Par1 2.5
2013-01-06 Par1 2.5
2013-01-01 Par2 1.5
2013-01-02 Par2 1.5
2013-01-03 Par2 1.5
2013-01-04 Par2 1.8
2013-01-05 Par2 1.8
2013-01-06 Par2 1.8
答案 0 :(得分:3)
这是list
的一个选项。通过将{Date_from'到{Date_to'(seq
)的map
来创建select
列,删除不需要的列(unnest
)和library(tidyverse)
metals %>%
mutate(Date = map2(Date_from, Date_to, seq, by = "1 day")) %>%
select(-Date_from, -Date_to) %>%
unnest %>%
select(Date, Parameter, conc)
# Date Parameter conc
#1 2013-01-01 Par1 1.5
#2 2013-01-02 Par1 1.5
#3 2013-01-03 Par1 1.5
#4 2013-01-04 Par1 2.5
#5 2013-01-05 Par1 2.5
#6 2013-01-06 Par1 2.5
#7 2013-01-01 Par2 1.5
#8 2013-01-02 Par2 1.5
#9 2013-01-03 Par2 1.5
#10 2013-01-04 Par2 1.8
#11 2013-01-05 Par2 1.8
#12 2013-01-06 Par2 1.8
base R
或者可以通过lst <- Map(seq, MoreArgs = list(by = "1 day"), metals$Date_from, metals$Date_to)
out <- cbind(Date = do.call(c, lst), metals[rep(seq_len(nrow(metals)),
lengths(lst)), c("Parameter", "conc")])
row.names(out) <- NULL
out
# Date Parameter conc
#1 2013-01-01 Par1 1.5
#2 2013-01-02 Par1 1.5
#3 2013-01-03 Par1 1.5
#4 2013-01-04 Par1 2.5
#5 2013-01-05 Par1 2.5
#6 2013-01-06 Par1 2.5
#7 2013-01-01 Par2 1.5
#8 2013-01-02 Par2 1.5
#9 2013-01-03 Par2 1.5
#10 2013-01-04 Par2 1.8
#11 2013-01-05 Par2 1.8
#12 2013-01-06 Par2 1.8
try {} catch() {}
答案 1 :(得分:2)
我们可以在不依赖57个软件包的情况下做到这一点:
metals <- data.frame(Date_from,Date_to,Parameter,conc)
do.call(
rbind.data.frame,
lapply(1:nrow(metals), function(.i) {
data.frame(
Date = seq(as.Date(metals$Date_from[.i]), as.Date(metals$Date_to[.i]), "1 day"),
Parameter = metals$Parameter[.i],
conc = as.double(as.character(metals$conc[.i])),
stringsAsFactors = FALSE
)
})
)
使用来自OP的经过预类型转换的数据帧:
library(microbenchmark)
microbenchmark(
base = do.call(
rbind.data.frame,
lapply(1:nrow(metals), function(.i) {
data.frame(
Date = seq(metals$Date_from[.i], metals$Date_to[.i], "1 day"),
Parameter = metals$Parameter[.i],
conc = metals$conc[.i],
stringsAsFactors = FALSE
)
})
),
base2 = {
lst <- Map(
seq, MoreArgs = list(by = "1 day"), metals$Date_from, metals$Date_to
)
cbind(
Date = do.call(c, lst),
metals[rep(seq_len(nrow(metals)), lengths(lst)), c("Parameter", "conc")]
)
},
tidy = metals %>%
mutate(Date = map2(Date_from, Date_to, seq, by = "1 day")) %>%
select(-Date_from, -Date_to) %>%
unnest %>%
select(Date, Parameter, conc)
)
## Unit: microseconds
## expr min lq mean median uq max neval
## base 2472.997 2615.7025 2758.6086 2678.6220 2765.6375 8085.012 100
## base2 716.680 784.0505 835.0233 815.9715 869.8095 1166.096 100
## tidy 7331.729 7671.4065 8644.6002 7889.7080 8080.5925 82376.963 100