我有两个相当大的数据文件,需要通过以下方式合并为一个文件:
A <- tibble(
id=1:2,
firstName=c("Alice", "Bob")
)
B <- tibble(
id=c(1,1,2),
email=c("alice@wonder.land.com", "alice2@wonderland.com", "bob@builder.com")
)
desiredResult <- tibble(
id=1:2,
firstName=c("Alice", "Bob"),
email1=c("alice@wonderland.com", "bob@builder.com"),
email2=c("alice2@wonderland.com", NA)
)
如何有效地做到这一点?我尝试使用spread()
,但没有成功,只能破解一个糟糕的解决方案:
notGood <-
inner_join(A, B, by = "id") %>%
split(., .$id) %>%
map_dfr(function(x) as.tibble(t(unlist(x)))) %>%
replace(is.na(.), "") %>%
unite(id, id1, id, sep = "") %>%
unite(firstName, firstName1, firstName, sep = "") %>%
unite(email, email1, email, sep = "") %>%
select(id, firstName, matches("email"))
编辑:
建议的解决方案效果很好,但是如何将它们应用于多个专栏?像本例一样:
A <- tibble(
id=1:2,
firstName=c("Alice", "Bob")
)
B <- tibble(
id=c(1,1,2),
email=c("alice@wonder.land.com", "alice2@wonderland.com", "bob@builder.com"),
phone=c("123", "456", "789")
)
desiredResult <- tibble(
id=1:2,
firstName=c("Alice", "Bob"),
email1=c("alice@wonderland.com", "bob@builder.com"),
email2=c("alice2@wonderland.com", NA),
phone1=c("123", "789"),
phone2=c("456", NA)
)
仅在建议的答案中添加更多列名是行不通的:
A %>%
left_join(B, by='id') %>%
group_by(id)%>%
mutate(rn=paste0('email',row_number())) %>%
mutate(rn2=paste0('phone',row_number())) %>%
spread(rn, email) %>%
spread(rn2, phone)
答案 0 :(得分:2)
检查此解决方案:
B %>%
group_by(id) %>%
mutate(rn = paste0('email', row_number())) %>%
spread(rn, email) %>%
right_join(A) %>%
select(id, firstName, everything())
A %>%
left_join(
B %>%
gather(key, val, -id) %>%
group_by(id, key) %>%
mutate(key2 = paste0(key, row_number())) %>%
ungroup() %>%
select(-key) %>%
spread(key2, val)
)
答案 1 :(得分:1)
desiredResult <-
A %>%
inner_join(B %>%
group_by(id) %>%
mutate(ColName = paste0("email",row_number())) %>%
ungroup() %>%
spread(ColName, email), by = "id")