First Last C_ID OrgName O_ID Program Role_Primary Role_Comms Role_Signatory
John Smith 10045 Acme 901 Buildings X
John Smith 10045 Acme 901 Buildings X
John Smith 10045 Acme 901 Homes X
Teddy Bush 10046 Acme 901 Buildings X
Teddy Bush 10046 Acme 901 Buildings X
Jess Clinton 10050 Consult 904 Homes X
Jess Clinton 10050 Consult 904 Homes X
Jess Clinton 10050 Consult 904 Homes X
First Last C_ID OrgName O_ID Program Role_Primary Role_Comms Role_Signatory
John Smith 10045 Acme 901 Buildings X X
John Smith 10045 Acme 901 Homes X
Teddy Bush 10046 Acme 901 Buildings X X
Jess Clinton 10050 Consult 904 Homes X X X
table1<-structure(list(First = structure(c(2L, 2L, 2L, 3L, 3L, 1L, 1L,
1L), .Label = c("Jess", "John", "Teddy"), class = "factor"),
Last = structure(c(3L, 3L, 3L, 1L, 1L, 2L, 2L, 2L), .Label = c("Bush",
"Clinton", "Smith"), class = "factor"), C_ID = c(10045L,
10045L, 10045L, 10046L, 10046L, 10050L, 10050L, 10050L),
OrgName = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Acme",
"Consult"), class = "factor"), O_ID = c(901L, 901L, 901L,
901L, 901L, 904L, 904L, 904L), Program = structure(c(1L,
1L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = c("Buildings", "Homes"
), class = "factor"), Role_Primary = structure(c(2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L), .Label = c("", "X"), class = "factor"),
Role_Comms = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("",
"X"), class = "factor"), Role_Signatory = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("", "X"), class = "factor")), .Names = c("First",
"Last", "C_ID", "OrgName", "O_ID", "Program", "Role_Primary",
"Role_Comms", "Role_Signatory"), class = "data.frame", row.names = c(NA,
table2<-structure(list(First = structure(c(2L, 2L, 3L, 1L), .Label = c("Jess",
"John", "Teddy"), class = "factor"), Last = structure(c(3L, 3L,
1L, 2L), .Label = c("Bush", "Clinton", "Smith"), class = "factor"),
C_ID = c(10045L, 10045L, 10046L, 10050L), OrgName = structure(c(1L,
1L, 1L, 2L), .Label = c("Acme", "Consult"), class = "factor"),
O_ID = c(901L, 901L, 901L, 904L), Program = structure(c(1L,
2L, 1L, 2L), .Label = c("Buildings", "Homes"), class = "factor"),
Role_Primary = structure(c(1L, 1L, 1L, 1L), .Label = "X", class = "factor"),
Role_Comms = structure(c(2L, 1L, 1L, 2L), .Label = c("",
"X"), class = "factor"), Role_Signatory = structure(c(1L,
1L, 2L, 2L), .Label = c("", "X"), class = "factor")), .Names = c("First",
"Last", "C_ID", "OrgName", "O_ID", "Program", "Role_Primary",
"Role_Comms", "Role_Signatory"), class = "data.frame", row.names = c(NA,
答案 0 :(得分:0)
df <- data.table::fread("First Last C_ID OrgName O_ID Program Role_Primary Role_Comms Role_Signatory
John Smith 10045 Acme 901 Buildings X
John Smith 10045 Acme 901 Buildings X
John Smith 10045 Acme 901 Homes X
Teddy Bush 10046 Acme 901 Buildings X
Teddy Bush 10046 Acme 901 Buildings X
Jess Clinton 10050 Consult 904 Homes X
Jess Clinton 10050 Consult 904 Homes X
Jess Clinton 10050 Consult 904 Homes X ")
df %>%
gather(Role, Member, Role_Primary:Role_Signatory) %>%
filter(!is.na(Member) & nchar(trimws(Member))>0) %>%
distinct() %>%
mutate(Role = factor(Role, unique(Role))) %>%
spread(Role, Member)
First Last C_ID OrgName O_ID Program Role_Primary Role_Comms Role_Signatory
1 Jess Clinton 10050 Consult 904 Homes X <NA> X
2 John Smith 10045 Acme 901 Buildings X X <NA>
3 John Smith 10045 Acme 901 Homes X <NA> <NA>
4 Teddy Bush 10046 Acme 901 Buildings X <NA> X
请注意distinct()行是存在的,因为在输入示例中,Jess Clinton具有两次列出相同的角色。
答案 1 :(得分:0)
#make a uniqueID column by pasting any of the relevant unique values together
df$uniqueID<-paste0(df$C_ID,df$OrgName, df$Program)
#remove duplicate rows, store as df2