
时间:2014-02-28 01:00:33

标签: r dataframe


我的数据框有CasesObservations和变量AmountCases是因子,observations是整数,它们一起构成一个索引,以便包含Case = 3和Observation = 4的行对应于第4次观察第三种情况,包含Case = 4和Observation = 1的行对应于第四种情况的第一次观察。



case <- c(1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4)
obs <- c(rep(1,6),rep(2,6),rep(3,4))
amount <- c(0,2,12,1,0,20,1,2,22,2,1,50,5,2,100,28)
d.example <- data.frame(case,obs,amount)
d.example$case <- as.factor(d.example$case)
case obs Amount 
1    1   0
2    1   2 
3    1   12
4    1   1
5    1   0 
6    1   20
1    2   1
2    2   2
3    2   22
4    2   2
5    2   1
6    2   50
1    3   5
2    3   2
3    3   100
4    3   28



case obs Amount deltaAmount
1    1   0      1
2    1   2      0
3    1   12     10
4    1   1      1
5    1   0      1
6    1   20     30
1    2   1      4
2    2   2      0
3    2   22     78
4    2   2      26
5    2   1      -1
6    2   50     -1
1    3   5      -1
2    3   2      -1
3    3   100    -1
4    3   28     -1


deltaAmount <- NULL
deltaAmount <- rep(-1, length(d$Case))
d$deltaAmount <- deltaAmount

x <- NULL
y <- NULL

for( i in unique(d$Case)) {   # i is the case index
    x <- NULL
# set x equal to a vector containing all the observations for the ith case except the first observation 
    x <- subset( unique(d$Observation[which(d$Case == i)]), unique( d$Observation[which(d$Case == i)]) > 1)

    for( j in x ) { # j is the observation index (starts at 2 to avoid the error that would occur if we subtract a preceeding obsevation from the first observation)

        d$AmountRaised[which(d$Case == i) & which(d$Observation == j)] - d$AmountRaised[which(d$Case == i) & which(d$Observation == j-1)] -> y
        y -> d$deltaAmount[which( d$Case == i & d$Observation == j-1 )] 




1: In which(d$Case == i) & which(d$Observation == j) : longer object length is not a multiple of shorter object length






2 个答案:

答案 0 :(得分:3)

这就是为plyr(和dplyr)构建的情况 - split / apply / combine。您可以使用diff()来获取行之间的差异。正如评论中指出的那样,diff()依赖于订单,所以只有在订购合适时才会有效:


d.example %.%
  group_by(case) %.%
  mutate(deltaAmount = c(diff(amount), NA))

#    case obs amount deltaAmount
# 1     1   1      0           1
# 2     2   1      2           0
# 3     3   1     12          10
# 4     4   1      1           1
# 5     5   1      0           1
# 6     6   1     20          30
# 7     1   2      1           4
# 8     2   2      2           0
# 9     3   2     22          78
# 10    4   2      2          26
# 11    5   2      1          NA
# 12    6   2     50          NA
# 13    1   3      5          NA
# 14    2   3      2          NA
# 15    3   3    100          NA
# 16    4   3     28          NA


d.out <- ddply(d.example, .(case), mutate, 
               deltaAmount = c(diff(amount), NA))
#    case obs amount deltaAmount
# 1     1   1      0           1
# 2     1   2      1           4
# 3     1   3      5          NA
# 4     2   1      2           0
# 5     2   2      2           0
# 6     2   3      2          NA
# 7     3   1     12          10
# 8     3   2     22          78
# 9     3   3    100          NA
# 10    4   1      1           1
# 11    4   2      2          26
# 12    4   3     28          NA
# 13    5   1      0           1
# 14    5   2      1          NA
# 15    6   1     20          30
# 16    6   2     50          NA

答案 1 :(得分:3)


d.example$case.delta <- 
  with(d.example, ave(amount, case, FUN=function(x) c(diff(x), NA)))


with(d.example, d.example[order(case, obs), ])
#    case obs amount case.delta
# 1     1   1      0          1
# 7     1   2      1          4
# 13    1   3      5         NA
# 2     2   1      2          0
# 8     2   2      2          0
# 14    2   3      2         NA
# 3     3   1     12         10
# 9     3   2     22         78
# 15    3   3    100         NA
# 4     4   1      1          1
# 10    4   2      2         26
# 16    4   3     28         NA
# 5     5   1      0          1
# 11    5   2      1         NA
# 6     6   1     20         30
# 12    6   2     50         NA