R中的条件重新编码和求和

时间:2016-08-17 15:22:15

标签: r data-manipulation recode

我的(样本)数据如下所示:

mydata <- structure(list(x1 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), x2 = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 4L, 4L), x3 = c(1L, 3L, 5L, 1L, 3L, 5L, 1L, 4L, 5L, 2L, 
1L, 5L, 6L, 6L), week = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 30L, 50L, 
22L, 52L, 36L, 25L, 26L), newar1 = c(0L, 0L, 2L, 0L, 0L, 2L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), newvar2 = c(0L, 2L, 0L, 0L, 
2L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)), .Names = c("x1", "x2", 
"x3", "week", "newar1", "newvar2"), class = "data.frame", row.names = c(NA, 
-14L))



x1  x2  x3  week
0   1   1   0
0   2   3   0
0   3   5   0
0   1   1   0
0   2   3   0
0   3   5   0
1   1   1   1
1   2   4   30
1   3   5   50
1   1   2   22
1   2   1   52
1   3   5   36
1   4   6   25
1   4   6   26

我想创建一个新变量newvar1

  • 如果x1 = 0 =&gt;我想计算整个数据集中x1等于1的次数(只有其他行,不包括自己的观察),但只计算具有相同x2和x3组合的行以及周数大于24的行

  • 如果x1 = 1 =&gt;我想计算整个数据集中x1等于1的次数,但只计算具有相同x2和x3组合的行以及周数减去25的行大于零的行((周-25)&gt; 0))。

&#34; sum&#34;我的意思是如果条件成立,x1等于1的次数。

通过&#34;如果&#34;我的意思是我只想在if之后的条件下求和x1。基本上我的问题是:我怎样才能根据条件对某些值求和?

我的数据应如下所示:

x1  x2  x3  week newvar1
0   1   1   0    0       
0   2   3   0    0       
0   3   5   0    2       
0   1   1   0    0       
0   2   3   0    0       
0   3   5   0    2       
1   1   1   1    0       
1   2   4   30   0       
1   3   5   50   1       
1   1   2   22   0       
1   2   1   52   0       
1   3   5   36   0       
1   4   6   25   0       
1   4   6   26   1       

目前我有以下代码,但这并没有考虑x2=x3和一周的约束。有什么建议怎么做?

mydata[,newvar1:=sum(x1), by=list(x2,x3)]

3 个答案:

答案 0 :(得分:0)

我认为我们可以使用for循环来完成此任务:

for(i in 1:nrow(mydata)){
 if(mydata[i,1] == 0){ # x1 == 0
   mydata[i,]$newvar1 = 
    sum(mydata[-i,1] == 1 & # count where x1 == 1
        mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3)
        mydata[i,3] == mydata[-i,3] & 
        mydata[-i,4] > 24) # and week > 24
 }else{ # x1 == 1
    mydata[i,]$newvar1 = 
        sum(mydata[-i,1] == 1 & # count where x1 == 1
            mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3)
            mydata[i,3] == mydata[-i,3] & 
            mydata[-i,4] > 25) # and week > 25
    }
}

# mydata
#    x1 x2 x3 week newvar1
# 1   0  1  1    0       0
# 2   0  2  3    0       0
# 3   0  3  5    0       2
# 4   0  1  1    0       0
# 5   0  2  3    0       0
# 6   0  3  5    0       2
# 7   1  1  1    1       0
# 8   1  2  4   30       0
# 9   1  3  5   50       1
# 10  1  1  2   22       0
# 11  1  2  1   52       0
# 12  1  3  5   36       1
# 13  1  4  6   25       1
# 14  1  4  6   26       0

或者,对于x1 == 1,您希望比较所有行的数据:

for(i in 1:nrow(mydata)){
    if(mydata[i,1] == 0){ # x1 == 0
        mydata[i,]$newvar1 = 
            sum(mydata[-i,1] == 1 & #count where x1 = 1
                    mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3)
                    mydata[i,3] == mydata[-i,3] & 
                    mydata[-i,4] > 24) # and week > 24
    }else{
        mydata[i,]$newvar1 = 
            sum(mydata[,1] == 1 &
                mydata[i,2] == mydata[,2] & 
                mydata[i,3] == mydata[,3] & 
                mydata[,4] > 25)
    }
}

# mydata
#    x1 x2 x3 week newvar1
# 1   0  1  1    0       0
# 2   0  2  3    0       0
# 3   0  3  5    0       2
# 4   0  1  1    0       0
# 5   0  2  3    0       0
# 6   0  3  5    0       2
# 7   1  1  1    1       0
# 8   1  2  4   30       1
# 9   1  3  5   50       2
# 10  1  1  2   22       0
# 11  1  2  1   52       1
# 12  1  3  5   36       2
# 13  1  4  6   25       1
# 14  1  4  6   26       1

答案 1 :(得分:0)

mydata$newvar1 <- ifelse(mydata$x1==0, sapply(seq_len(nrow(mydata)), function(i) with (mydata, sum(x1[week > 25 & x2==x2[i] & x3==x3[i]]))), 0)
mydata$newvar1 <- ifelse(mydata$x1==1, sapply(seq_len(nrow(mydata)), function(i) with (mydata, sum(x1[week < week[i] & week[i]!=0 & week-week[i]<25 & x2==x2[i] & x3==x3[i]]))), mydata$newvar1)

答案 2 :(得分:0)

使用dplyr

library(dplyr)
mydata %>% group_by(x2, x3) %>%
    mutate(newvar1 = ifelse(x1 == 0,
                            sum(x1 * week > 24),
                            sum(x1 * week > 25) - (week > 25) * (x1 == 1)))
# Source: local data frame [14 x 6]
# Groups: x2, x3 [7]
# 
#       x1    x2    x3  week newvar2 newvar1
#    <int> <int> <int> <int>   <int>   <int>
# 1      0     1     1     0       0       0
# 2      0     2     3     0       2       0
# 3      0     3     5     0       0       2
# 4      0     1     1     0       0       0
# 5      0     2     3     0       2       0
# 6      0     3     5     0       0       2
# 7      1     1     1     1       0       0
# 8      1     2     4    30       0       0
# 9      1     3     5    50       1       1
# 10     1     1     2    22       0       0
# 11     1     2     1    52       0       0
# 12     1     3     5    36       0       1
# 13     1     4     6    25       0       1
# 14     1     4     6    26       0       0

else条件中的奇怪位- (week > 25) * (x1 == 1),是从可能与自身匹配的行中减去1。