我有一个数据框,如下所示。对于每个gvkey,我想计算多年来给定Au(AU的任期)的连续出现。下面给出了名为TENURE的列中具有所需结果的示例数据框:
GVKEY FYEAR AU TENURE
1 1012 2000 1 1
2 1012 2001 1 2
3 1012 2002 1 3
4 1012 2003 1 4
5 1012 2004 2 1
6 1012 2005 2 2
7 1012 2006 1 1
8 1012 2007 2 1
9 1012 2008 2 2
10 1012 2009 2 3
有关如何执行此操作的任何建议?我正在努力解决这个问题。谢谢。
> dput(head(pp,200))
structure(list(GVKEY = c(1001L, 1001L, 1003L, 1003L, 1003L, 1003L,
1003L, 1003L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L,
1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L,
1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L, 1004L,
1004L, 1004L, 1004L, 1004L, 1007L, 1007L, 1008L, 1008L, 1009L,
1009L, 1009L, 1009L, 1009L, 1009L, 1009L, 1009L, 1009L, 1009L,
1009L, 1010L, 1010L, 1010L, 1010L, 1010L, 1010L, 1010L, 1010L,
1010L, 1010L, 1010L, 1010L, 1010L, 1010L, 1010L, 1010L, 1010L,
1010L, 1010L, 1010L, 1011L, 1011L, 1011L, 1011L, 1011L, 1011L,
1011L, 1011L, 1011L, 1011L, 1011L, 1012L, 1012L, 1012L, 1012L,
1012L, 1012L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1015L, 1015L, 1015L, 1015L, 1016L, 1016L, 1016L,
1016L, 1017L, 1017L, 1017L, 1017L, 1017L, 1017L, 1017L, 1017L,
1017L, 1017L, 1017L, 1018L, 1018L, 1018L, 1018L, 1019L, 1019L,
1019L, 1019L, 1019L, 1019L, 1019L, 1019L, 1019L, 1019L, 1019L,
1019L, 1019L, 1019L, 1019L, 1019L, 1019L, 1019L, 1020L, 1020L,
1020L, 1020L, 1020L, 1020L, 1021L, 1021L, 1021L, 1021L, 1021L,
1021L, 1021L, 1021L, 1021L, 1021L, 1021L, 1021L, 1021L, 1021L,
1021L, 1021L, 1021L, 1021L, 1021L, 1021L, 1021L, 1021L, 1021L,
1021L, 1021L, 1023L, 1023L, 1023L, 1023L, 1025L, 1025L, 1025L,
1025L, 1025L, 1025L, 1025L, 1025L), FYEAR = c(1984L, 1985L, 1984L,
1985L, 1986L, 1987L, 1988L, 1989L, 1984L, 1985L, 1986L, 1987L,
1988L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L,
2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 2012L, 1984L, 1985L,
1984L, 1985L, 1984L, 1985L, 1986L, 1987L, 1988L, 1989L, 1990L,
1991L, 1992L, 1993L, 1994L, 1984L, 1985L, 1986L, 1987L, 1988L,
1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L, 1997L,
1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 1984L, 1985L, 1986L,
1987L, 1988L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1984L,
1985L, 1986L, 1987L, 1988L, 1989L, 1984L, 1985L, 1986L, 1987L,
1988L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L,
2006L, 2007L, 2008L, 2009L, 2010L, 1984L, 1985L, 1986L, 1987L,
1984L, 1985L, 1986L, 1987L, 1984L, 1985L, 1986L, 1987L, 1988L,
1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1984L, 1985L, 1986L,
1987L, 1984L, 1985L, 1986L, 1987L, 1988L, 1989L, 1990L, 1991L,
1992L, 1993L, 1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L,
2001L, 1984L, 1985L, 1986L, 1987L, 1988L, 1989L, 1984L, 1985L,
1986L, 1987L, 1988L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L,
1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L,
2004L, 2005L, 2006L, 2007L, 2008L, 1984L, 1985L, 1986L, 1987L,
1984L, 1985L, 1986L, 1987L, 1988L, 1989L, 1990L, 1991L), AU = c(6,
6, 8, 8, 8, 8, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, -1, 5, 5, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 9, 9, 20, 20, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 9, 9, 6, 6, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 9, 9, 2, 2, 2,
2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 21, 21, 9, 9, 9, 6, 6, 6, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 9, 9, 9, 9,
21, 3, 3, 3, 3, 9, 9, 3, 9, 11, 11, 11, 11)), .Names = c("GVKEY",
"FYEAR", "AU"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L,
74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L,
87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L,
100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L,
111L, 112L, 113L, 114L, 115L, 116L, 126L, 127L, 128L, 129L, 130L,
131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L,
142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L,
153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 163L,
164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L,
175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 184L, 185L,
186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 196L,
197L, 198L, 199L, 200L, 201L, 202L, 203L, 204L, 205L, 206L, 207L,
208L, 209L), class = "data.frame")
答案 0 :(得分:4)
使用sequence
和rle
:
library(data.table)
DT <- data.table(pp)
DT[, TENURE := sequence(rle(AU)$length), by=GVKEY]
# GVKEY FYEAR AU TENURE
# 1: 1001 1984 6 1
# 2: 1001 1985 6 2
# 3: 1003 1984 8 1
# 4: 1003 1985 8 2
# 5: 1003 1986 8 3
# ---
#196: 1025 1987 9 1
#197: 1025 1988 11 1
#198: 1025 1989 11 2
#199: 1025 1990 11 3
#200: 1025 1991 11 4
答案 1 :(得分:-1)
尝试:
df$TENURE <- ave(df$AU, df$AU, FUN=order)