我正在使用作业应用程序数据,其中每个先前的作业都是excel文件中的一行。我想转换数据集,以便每个过去的雇主都有列1,2,3,4等......
我认为问题最好用一个例子来解释。如何从开始数据帧到达所需的数据帧?
我尝试了一些熔化和铸造,但我遇到了困难,因为我不希望为每个独特的公司名称创建一个列,而是基于唯一公司名称的数量。
id <- c(1000,1000,1002,1007,1007,1007,1007,1009)
employers <-c("Ikea","Subway","DISH","DISH","Ikea","Starbucks","Google","Google")
start_date <- c("2/1/2013","5/1/2000","4/1/2012","3/1/2014","8/15/2011","4/15/2008","2/1/2004","3/15/2010")
start <- data.frame(cbind(id,employers,start_date))
colnames(start) <- c("id","employers","start_date")
start
unique_id <- c(1000,1002,1007,1009)
emp1 <- c("Ikea","DISH","DISH","Google")
emp2 <- c("Subway",NA,"Ikea",NA)
emp3 <- c(NA,NA,"Starbucks",NA)
emp4 <- c(NA, NA,"Google",NA)
emp1_start <- c("2/1/2013","4/1/2012","3/1/2014","3/15/2010")
emp2_start <- c("5/1/2000",NA,"8/15/2011",NA)
emp3_start <- c(NA,NA,"4/15/2008",NA)
emp4_start <- c(NA,NA,"2/1/2004",NA)
desired <- data.frame(cbind(unique_id,emp1,emp2,emp3,emp4,emp1_start,emp2_start,emp3_start,emp4_start))
desired
答案 0 :(得分:0)
使用您的数据(有意与factor
一起使用,很容易修复stringsAsFactors = FALSE
):
start <- data.frame(
id=c( "1000", "1000", "1002", "1007",
"1007", "1007", "1007", "1009" ),
employers=c( "Ikea", "Subway", "DISH", "DISH",
"Ikea", "Starbucks", "Google", "Google" ),
start_date=c( "2/1/2013", "5/1/2000", "4/1/2012", "3/1/2014",
"8/15/2011", "4/15/2008", "2/1/2004", "3/15/2010" )
)
这对你有用吗?
library(dplyr)
library(tidyr)
a <- start %>%
select(-start_date) %>%
group_by(id) %>%
mutate(emp = sprintf("emp%s", seq_len(n()))) %>%
ungroup() %>%
spread(emp, employers)
b <- start %>%
select(-employers) %>%
group_by(id) %>%
mutate(emp = sprintf("emp%s_start", seq_len(n()))) %>%
ungroup() %>%
spread(emp, start_date)
left_join(a, b, by = "id")
# # A tibble: 4 x 9
# id emp1 emp2 emp3 emp4 emp1_start emp2_start emp3_start emp4_start
# <fctr> <fctr> <fctr> <fctr> <fctr> <fctr> <fctr> <fctr> <fctr>
# 1 1000 Ikea Subway NA NA 2/1/2013 5/1/2000 NA NA
# 2 1002 DISH NA NA NA 4/1/2012 NA NA NA
# 3 1007 DISH Ikea Starbucks Google 3/1/2014 8/15/2011 4/15/2008 2/1/2004
# 4 1009 Google NA NA NA 3/15/2010 NA NA NA