我有一份关于职位晋升的数据。对于每个人,我都有他们何时被公司聘用,晋升和离职的确切数据。我也有一些特点。我在用r。
SubjectID Entry Stage1 Stage2 Stage3 Stage4 Exit Race Edu
1 1/12/1990 1/12/1990 1/12/1990 4/3/1994 5/5/1994 B M
2 1/17/1991 1/17/1991 3/3/1991 3/18/1992 1/1/1993 W C
3 1/24/1991 1/24/1991 5/6/1994 B M
我想将这些数据转换成一个长的纵向数据集,其中每个日期都报告一个人所处的阶段,同时还报告时间不变的特征。我的结束日期是1995年1月1日,之后没有任何观察结果。我已经研究过重塑包装,但没有我需要的包装。
我的数据:
structure(list(ï..Name= structure(c(2L,1L,4L,3L),.Label = c(“ Ademulegun,Sauel Adesujo”,“ Bassey,惠灵顿Umo”,“ Imo,U。 O。“,” Lawan,Umar“),类=” factor“), 蒙斯= c(0L,0L,0L,0L),桑德赫斯特= c(0L,0L,1L,0L), 条目=结构(c(2L,3L,1L,4L),.Label = c(“ 2/6/1953”, “ 4/30/1949”,“ 6/11/1949”,“ 6/4/1955”),类=“ factor”), Second.Lieutenant = structure(c(2L,3L,1L,4L),.Label = c(“ 2/6/1953”, “ 4/30/1949”,“ 6/11/1949”,“ 6/4/1955”),类=“ factor”), 中尉=结构(c(2L,1L,4L,3L),.Label = c(“ 12/20/1949”, “ 4/30/1949”,“ 5/3/1958”,“ 8/1/1955”),类=“ factor”), 队长= structure(c(2L,3L,1L,4L),.Label = c(“”,“ 2/7/1951”, “ 3/5/1952”,“ 5/3/1958”),class =“ factor”),Major = structure(c(4L, 3L,1L,2L),.Label = c(“”,“ 1/15/1963”,“ 12/27/1958”,“ 6/21/1957” ),class =“ factor”),Lieutenant.Colonel = structure(c(4L, 3L,1L,2L),.Label = c(“”,“ 1/15/1963”,“ 10/3/1962”,“ 8/30/1962” ),class =“ factor”),Colonel = structure(c(3L,2L,1L,1L ).Label = c(“”,“ 10/3/1962”,“ 2/26/1966”),class =“ factor”), Brigadier.General = structure(c(3L,2L,1L,1L),.Label = c(“”, “ 10/3/1962”,“ 2/26/1966”),class =“ factor”),Depature = structure(c(2L, 1L,3L,4L),. Label = c(“ 1/15/1966”,“ 11/1/1966”,“ 5/8/1956”, “ 7/6/1967”),类=“因子”),种族=结构(c(1L, 4L,3L,2L)、. Label = c(“ Efik”,“ Igbo”,“ Kanuri”,“ Yoruba” ),class =“ factor”)),class =“ data.frame”,row.names = c(NA, -4L))
我正在寻找这样的东西:
Name Date Mons Sandhurst Ethnicity Rank
Bassey 4/30/1949 0 0 Efik Lieutenant
Bassey 5/1/1949 0 0 Efik Lieutenant
....
Bassey 2/7/1951 0 0 Efik Captain
答案 0 :(得分:0)
data.table解决方案
library(data.table)
样本数据
df <- structure(list(Name = structure(c(2L, 1L, 4L, 3L), .Label = c("Ademulegun, Sauel Adesujo", "Bassey, Wellington Umo", "Imo, U. O.", "Lawan, Umar"), class = "factor"), Mons = c(0L, 0L, 0L, 0L), Sandhurst = c(0L, 0L, 1L, 0L), Entry = structure(c(2L, 3L, 1L, 4L), .Label = c("2/6/1953", "4/30/1949", "6/11/1949", "6/4/1955"), class = "factor"), Second.Lieutenant = structure(c(2L, 3L, 1L, 4L), .Label = c("2/6/1953", "4/30/1949", "6/11/1949", "6/4/1955"), class = "factor"), Lieutenant = structure(c(2L, 1L, 4L, 3L), .Label = c("12/20/1949", "4/30/1949", "5/3/1958", "8/1/1955"), class = "factor"), Captain = structure(c(2L, 3L, 1L, 4L), .Label = c("", "2/7/1951", "3/5/1952", "5/3/1958"), class = "factor"), Major = structure(c(4L, 3L, 1L, 2L), .Label = c("", "1/15/1963", "12/27/1958", "6/21/1957" ), class = "factor"), Lieutenant.Colonel = structure(c(4L, 3L, 1L, 2L), .Label = c("", "1/15/1963", "10/3/1962", "8/30/1962" ), class = "factor"), Colonel = structure(c(3L, 2L, 1L, 1L ), .Label = c("", "10/3/1962", "2/26/1966"), class = "factor"), Brigadier.General = structure(c(3L, 2L, 1L, 1L), .Label = c("", "10/3/1962", "2/26/1966"), class = "factor"), Depature = structure(c(2L, 1L, 3L, 4L), .Label = c("1/15/1966", "11/1/1966", "5/8/1956", "7/6/1967"), class = "factor"), ethnicity = structure(c(1L, 4L, 3L, 2L), .Label = c("Efik", "Igbo", "Kanuri", "Yoruba" ), class = "factor")), class = "data.frame", row.names = c(NA, -4L))
使用data.tables快速melt
重塑
library( data.table )
data.table::melt( data = setDT( df ),
id.vars = c("Name", "Mons", "Sandhurst", "ethnicity" ),
value.name = "Date",
variable.name = "Rank",
na.rm = TRUE )
结果
# Name Mons Sandhurst ethnicity Rank Date
# 1: Bassey, Wellington Umo 0 0 Efik Entry 4/30/1949
# 2: Ademulegun, Sauel Adesujo 0 0 Yoruba Entry 6/11/1949
# 3: Lawan, Umar 0 1 Kanuri Entry 2/6/1953
# 4: Imo, U. O. 0 0 Igbo Entry 6/4/1955
# 5: Bassey, Wellington Umo 0 0 Efik Second.Lieutenant 4/30/1949
# 6: Ademulegun, Sauel Adesujo 0 0 Yoruba Second.Lieutenant 6/11/1949
# 7: Lawan, Umar 0 1 Kanuri Second.Lieutenant 2/6/1953
# 8: Imo, U. O. 0 0 Igbo Second.Lieutenant 6/4/1955
# 9: Bassey, Wellington Umo 0 0 Efik Lieutenant 4/30/1949
根据需要重新排序...
注释:
如果要使“空”日期在熔融数据中消失,请确保在源数据中的空白日期中平移NA
。在熔融函数中na.rm = TRUE
然后将其删除。 / p>