符合条件的连续年份的返回值

时间:2016-07-31 09:15:27

标签: r

这是对此问题的跟进:Return values in sucessive years

@akrun让我发一个新问题

我的数据样本可以在这里找到: https://dl.dropboxusercontent.com/u/5699030/SuccessiveYears.xlsx

或者它在数据框中

id <- c(1,1,1,2,3,4,4,5,5,5,6,7,7,8,8,8,8,9,9,9,10,10,11,12,12,12,13,13,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15)
Year <- c(2010,2011,2012,2010,2013,2012,2013,2010,2011,2012,2013,2011,2013,2011,2012,2014,2015,2010,2011,2013,2010,2012,2013,2010,2011,2013,2011,2012,2011,2012,2013,2014,2015,2010,2010,2011,2011,2012,2012,2013,2013,2014,2014,2015,2015)
Class <- c("c","c","c","c","a","c","b","c","c","c","b","c","b","d","f","f","f","c","c","c","c","c","b","c","c","b","c","c","c","c","b","c","b","e","d","e","d","e","d","e","d","e","d","e","d")
ValueA <- c(0.1,0.12,0.15,0.21,0.09,0,0.04,0.05,0.27,0.1,0,0,0,0.22,0.18,0,0.11,0,0,0.06,0,0,0,0.19,0.23,0.18,0,0.08,0,0,0.02,0.08,0.09,0.51,0.99,0.71,0.87,0.97,0.95,1.38,0.84,0.53,1.05,1.04,0.77)
ValueB <- c(0.42,0.58,0.38,0.26,0.28,0.28,0.27,0.22,0.27,0.2,0.11,0,0.05,0.34,0.27,0,0.11,0,0,0.06,0,0,0,0.38,0.31,0.35,0.15,0.17,0.13,0.04,0.07,0.08,0.28,0.92,1.34,0.98,1.18,1.06,1.29,1.74,1.15,0.75,1.49,1.33,1.07)
ValueC <- c(1.3,0.81,1.07,1.16,3.04,0.28,0.18,1.09,1.42,1.19,0.11,0.2,0.45,1.79,1.08,0.59,0.66,0.35,0.3,0.17,0.44,0.57,0.65,2.32,2.45,2.01,3.24,2.45,0.73,0.5,0.4,0.56,0.56,5.96,5.88,5.27,5.01,5.44,6.09,5.5,6.08,3.92,4.98,5.7,5.12)
df <- data.frame(id, Year, Class, ValueA, ValueB, ValueC)
df

对于id和class的每个组合,其中有连续两年的值,我希望ValueA,ValueB和ValueC连续两年连续返回。 后续年份值的所有列标题都应以“Y2”为前缀。

每个id在任何一年中都可以包含多个类的值,因此我们需要查看id和class的组合。

输出应如下所示:

id <- c(1,1,5,5,8,9,13,14,15,15,15,15,15,15,15,15,15,15)
Year <- c(2010,2011,2010,2011,2014,2010,2011,2011,2010,2010,2011,2011,2012,2012,2013,2013,2014,2014)
Class <- c("c","c","c","c","f","c","c","c","e","d","e","d","e","d","e","d","e","d")
ValueA <- c(0.1,0.12,0.05,0.27,0,0,0,0,0.51,0.99,0.71,0.87,0.97,0.95,1.38,0.84,0.53,1.05)
ValueB <- c(0.42,0.58,0.22,0.27,0,0,0.15,0.13,0.92,1.34,0.98,1.18,1.06,1.29,1.74,1.15,0.75,1.49)
ValueC <- c(1.3,0.81,1.09,1.42,0.59,0.35,3.24,0.73,5.96,5.88,5.27,5.01,5.44,6.09,5.5,6.08,3.92,4.98)
Y2Year <- c(2011,2012,2011,2012,2015,2011,2012,2012,2011,2011,2012,2012,2013,2013,2014,2014,2015,2015)
Y2Class <- c("c","c","c","c","f","c","c","c","e","d","e","d","e","d","e","d","e","d")
Y2ValueA <- c(0.12,0.15,0.27,0.1,0.11,0,0.08,0,0.71,0.87,0.97,0.95,1.38,0.84,0.53,1.05,1.04,0.77)
Y2ValueB <- c(0.58,0.38,0.27,0.2,0.11,0,0.17,0.04,0.98,1.18,1.06,1.29,1.74,1.15,0.75,1.49,1.33,1.07)
Y2ValueC <- c(0.81,1.07,1.42,1.19,0.66,0.3,2.45,0.5,5.27,5.01,5.44,6.09,5.5,6.08,3.92,4.98,5.7,5.12)
df1 <- data.frame(id, Year, Class, ValueA, ValueB, ValueC, Y2Year, Y2Class, Y2ValueA, Y2ValueB, Y2ValueC)
df1

1 个答案:

答案 0 :(得分:0)

首先,我使用dlply将数据集拆分为由id和Class标识的组:

library(plyr)
grouped <- dlply(df, .(id, Class), function(x) return(x[order(x$Year), ]))

然后,应用连续年份逻辑,使用merge连接连续年份的数据,并将结果作为list返回:

out <- lapply(grouped, function(x) {
    if (nrow(x)>1) {
        out <- lapply(1:(nrow(x)-1), function(i) {
            if ((x$Year[i]+1)==(x$Year[i+1])) {
                merge(x[i,], x[i+1,], by=c("id", "Class"))
            } else {
                NULL
            }
        })      
    }
})

最后,将结果绑定到数据帧中:

final <- do.call(rbind, lapply(out, function(x) {
    if(length(x)>=1) {
        do.call(rbind, x)   
    } else {
        x
    }
}))

通过重命名列完成。

cols <- c("Year", paste0("Value", LETTERS[1:3]))
names(final) <- c("id", "Class", cols, paste0("Y2", cols))
rownames(final) <- 1:nrow(final)

结果:

> final
   id Class Year ValueA ValueB ValueC Y2Year Y2ValueA Y2ValueB Y2ValueC
1   1     c 2010   0.10   0.42   1.30   2011     0.12     0.58     0.81
2   1     c 2011   0.12   0.58   0.81   2012     0.15     0.38     1.07
3   5     c 2010   0.05   0.22   1.09   2011     0.27     0.27     1.42
4   5     c 2011   0.27   0.27   1.42   2012     0.10     0.20     1.19
5   8     f 2014   0.00   0.00   0.59   2015     0.11     0.11     0.66
6   9     c 2010   0.00   0.00   0.35   2011     0.00     0.00     0.30
7  12     c 2010   0.19   0.38   2.32   2011     0.23     0.31     2.45
8  13     c 2011   0.00   0.15   3.24   2012     0.08     0.17     2.45
9  14     c 2011   0.00   0.13   0.73   2012     0.00     0.04     0.50
10 15     d 2010   0.99   1.34   5.88   2011     0.87     1.18     5.01
11 15     d 2011   0.87   1.18   5.01   2012     0.95     1.29     6.09
12 15     d 2012   0.95   1.29   6.09   2013     0.84     1.15     6.08
13 15     d 2013   0.84   1.15   6.08   2014     1.05     1.49     4.98
14 15     d 2014   1.05   1.49   4.98   2015     0.77     1.07     5.12
15 15     e 2010   0.51   0.92   5.96   2011     0.71     0.98     5.27
16 15     e 2011   0.71   0.98   5.27   2012     0.97     1.06     5.44
17 15     e 2012   0.97   1.06   5.44   2013     1.38     1.74     5.50
18 15     e 2013   1.38   1.74   5.50   2014     0.53     0.75     3.92
19 15     e 2014   0.53   0.75   3.92   2015     1.04     1.33     5.70

我最后还有一行而不是你的输出。也许你错过了一排?