让我们创建示例数据:
df <- data.frame(date=c("2017-01-01","2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05"), X1=c("A", "B", "C", "D", "F"),
X2=c("B", "A", "D", "F", "C"))
df2 <- data.frame(date=c("2017-01-01","2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05"),
A=c("3", "4", "2", "1", "5"),
B=c("6", "2", "5", "1", "1"),
C=c("1", "4", "5", "2", "3"),
D=c("67", "67", "63", "61", "62"),
F=c("31", "33", "35", "31", "38"))
所以我有两个数据框,我希望按日期和X1和X2匹配df2到df的值,并为这些创建新的变量。这对我来说很棘手的是df2中的匹配值是在colnames中。最终结果应如下所示:
> result
date X1 X2 Var1 Var2
1 2017-01-01 A B 3 6
2 2017-01-02 B A 2 4
3 2017-01-03 C D 5 63
4 2017-01-04 D F 61 31
5 2017-01-05 F C 38 3
result <- data.frame(date=c("2017-01-01","2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05"),
X1=c("A", "B", "C", "D", "F"),
X2=c("B", "A", "D", "F", "C"),
Var1=c("3", "2", "5", "61", "38"),
Var2=c("6", "4", "63", "31", "3"))
我想使用mapvalues,但无法理解。第二个想法是用df2进行长时间格式化(熔化),然后尝试,但也失败了。
好的,这是我最好的尝试,只是觉得可以有更有效的方法,如果你必须为数据框创建多个(> 50)新变量。
df2.long <- melt(df2, id.vars = c("date"))
df$Var1 <- na.omit(merge(df, df2.long, by.x = c("date", "X1"), by.y = c("date", "variable"), all.x = FALSE, all.y = TRUE))[,4]
df$Var2 <- na.omit(merge(df, df2.long, by.x = c("date", "X2"), by.y = c("date", "variable"), all.x = FALSE, all.y = TRUE))[,5]
答案 0 :(得分:4)
使用dplyr
和tidyr
:
df2_m <- group_by(df2, date) %>%
gather('X1', 'var', -date)
left_join(df, df2_m) %>%
left_join(df2_m, by = c('date', 'X2' = 'X1')) %>%
rename(Var1 = var.x, Var2 = var.y) -> result
答案 1 :(得分:3)
双熔&gt;加入&gt;使用data.table
library(data.table) # v>=1.10.0
dcast(
melt(setDT(df), 1L)[ # melt the first table by date
melt(setDT(df2), 1L), # melt the second table by date
on = .(date, value = variable), # join by date and the letters
nomatch = 0L], # remove everything that wasn't matched
date ~ variable, # convert back to long format
value.var = c("value", "i.value")) # take both values columns
# date value_X1 value_X2 i.value_X1 i.value_X2
# 1: 2017-01-01 A B 3 6
# 2: 2017-01-02 B A 2 4
# 3: 2017-01-03 C D 5 63
# 4: 2017-01-04 D F 61 31
# 5: 2017-01-05 F C 38 3
答案 2 :(得分:2)
我们可以使用${COOKIE_FOO}
来获取&#39; df2&#39;的列索引来自&#39; X1&#39;和&#39; X2&#39;带有行序列的列match
,使用行/列索引提取&#39; df2&#39;中的值,并指定输出以创建&#39; Var&#39;列
cbind
答案 3 :(得分:2)
mapply
的可能性:
df$Var1 <- mapply(function(day, col) df2[df2$date==day, as.character(col)],
day=df$date, col=df$X1)
df$Var2 <- mapply(function(day, col) df2[df2$date==day, as.character(col)],
day=df$date, col=df$X2)
df
# date X1 X2 Var1 Var2
#1 2017-01-01 A B 3 6
#2 2017-01-02 B A 2 4
#3 2017-01-03 C D 5 63
#4 2017-01-04 D F 61 31
#5 2017-01-05 F C 38 3
<强> NB:强>
如果您要修改更多列(不仅仅是示例中的2个),则可以使用lapply
循环遍历列X.
:
df[, paste0("Var", 1:2)] <- lapply(df[,paste0("X", 1:2)],
function(value) {
mapply(function(day, col) df2[df2$date==day, as.character(col)],
day=df$date, col=value)})
答案 4 :(得分:1)
使用融合和匹配:
df2l<-melt(df2, measure=c("A","B","C","D","F"))
Indices <- match(paste(df$date, df$X1), paste(df2l$date,df2l$variable))
df$Var1 <- df2l$value[Indices]
Indices2 <- match(paste(df$date, df$X2), paste(df2l$date,df2l$variable))
df$Var2 <- df2l$value[Indices2]