我有data.frame
如下。
DF <- structure(list(ID = c("k1", "k1", "k2", "k2", "k3", "k3", "k3",
"k4", "k4", "k5", "k5", "k5"), g1 = c(NA, NA, NA, NA, "robin",
"robin", "robin", "norse", "norse", "spidey", "spidey", "spidey"
), g2 = c("olsen", "olsen", "lane", "lang", "damien", "jason",
"dick", NA, NA, "peter", "miles", "ben"), g3 = c(NA, NA, NA,
NA, "wayne", "todd", "grayson", "Masterson", "odinson", "616",
"ultimate", "clone")), .Names = c("ID", "g1", "g2", "g3"), row.names = c(NA,
12L), class = "data.frame")
DF
ID g1 g2 g3
1: k1 NA olsen NA
2: k1 NA olsen NA
3: k2 NA lane NA
4: k2 NA lang NA
5: k3 robin damien wayne
6: k3 robin jason todd
7: k3 robin dick grayson
8: k4 norse NA Masterson
9: k4 norse NA odinson
10: k5 spidey peter 616
11: k5 spidey miles ultimate
12: k5 spidey ben
如果记录中的数据不同以获得以下结果,如何根据键列ID
合并重复的记录?
out <- structure(list(ID = c("k1", "k2", "k3", "k4", "k5"), g1 = c("NA",
"NA", "robin", "norse", "spidey"), g2 = c("olsen", "lane:lang",
"damien:jason:dick", "NA", "peter:miles:ben"), g3 = c("NA", "NA",
"wayne:todd:grayson", "Masterson:odinson", "616:ultimate:clone"
)), row.names = c(NA, -5L), class = "data.frame", .Names = c("ID",
"g1", "g2", "g3"))
out
ID g1 g2 g3
1 k1 NA olsen NA
2 k2 NA lane:lang NA
3 k3 robin damien:jason:dick wayne:todd:grayson
4 k4 norse NA Masterson:odinson
5 k5 spidey peter:miles:ben 616:ultimate:clone
答案 0 :(得分:5)
使用data.table
解决方案。
library(data.table)
创建data.table
DT <- as.data.table(DF)
合并重复的记录
DT[, lapply(.SD, function(x) paste(unique(x), collapse = ":")), by = ID]
答案 1 :(得分:3)
使用dplyr
library(dplyr)
DF %>%
group_by(ID) %>%
summarise_each(funs(paste(unique(.), collapse=":")))
# ID g1 g2 g3
#1 k1 NA olsen NA
#2 k2 NA lane:lang NA
#3 k3 robin damien:jason:dick wayne:todd:grayson
#4 k4 norse NA Masterson:odinson
#5 k5 spidey peter:miles:ben 616:ultimate:clone