我的(样本)数据如下所示:
mydata <- structure(list(x1 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), x2 = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 4L, 4L), x3 = c(1L, 3L, 5L, 1L, 3L, 5L, 1L, 4L, 5L, 2L,
1L, 5L, 6L, 6L), week = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 30L, 50L,
22L, 52L, 36L, 25L, 26L), newar1 = c(0L, 0L, 2L, 0L, 0L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), newvar2 = c(0L, 2L, 0L, 0L,
2L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)), .Names = c("x1", "x2",
"x3", "week", "newar1", "newvar2"), class = "data.frame", row.names = c(NA,
-14L))
x1 x2 x3 week
0 1 1 0
0 2 3 0
0 3 5 0
0 1 1 0
0 2 3 0
0 3 5 0
1 1 1 1
1 2 4 30
1 3 5 50
1 1 2 22
1 2 1 52
1 3 5 36
1 4 6 25
1 4 6 26
我想创建一个新变量newvar1
:
如果x1 = 0 =&gt;我想计算整个数据集中x1等于1的次数(只有其他行,不包括自己的观察),但只计算具有相同x2和x3组合的行以及周数大于24的行
如果x1 = 1 =&gt;我想计算整个数据集中x1等于1的次数,但只计算具有相同x2和x3组合的行以及周数减去25的行大于零的行((周-25)&gt; 0))。
&#34; sum&#34;我的意思是如果条件成立,x1等于1的次数。
通过&#34;如果&#34;我的意思是我只想在if之后的条件下求和x1。基本上我的问题是:我怎样才能根据条件对某些值求和?
我的数据应如下所示:
x1 x2 x3 week newvar1
0 1 1 0 0
0 2 3 0 0
0 3 5 0 2
0 1 1 0 0
0 2 3 0 0
0 3 5 0 2
1 1 1 1 0
1 2 4 30 0
1 3 5 50 1
1 1 2 22 0
1 2 1 52 0
1 3 5 36 0
1 4 6 25 0
1 4 6 26 1
目前我有以下代码,但这并没有考虑x2=x3
和一周的约束。有什么建议怎么做?
mydata[,newvar1:=sum(x1), by=list(x2,x3)]
答案 0 :(得分:0)
我认为我们可以使用for
循环来完成此任务:
for(i in 1:nrow(mydata)){
if(mydata[i,1] == 0){ # x1 == 0
mydata[i,]$newvar1 =
sum(mydata[-i,1] == 1 & # count where x1 == 1
mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3)
mydata[i,3] == mydata[-i,3] &
mydata[-i,4] > 24) # and week > 24
}else{ # x1 == 1
mydata[i,]$newvar1 =
sum(mydata[-i,1] == 1 & # count where x1 == 1
mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3)
mydata[i,3] == mydata[-i,3] &
mydata[-i,4] > 25) # and week > 25
}
}
# mydata
# x1 x2 x3 week newvar1
# 1 0 1 1 0 0
# 2 0 2 3 0 0
# 3 0 3 5 0 2
# 4 0 1 1 0 0
# 5 0 2 3 0 0
# 6 0 3 5 0 2
# 7 1 1 1 1 0
# 8 1 2 4 30 0
# 9 1 3 5 50 1
# 10 1 1 2 22 0
# 11 1 2 1 52 0
# 12 1 3 5 36 1
# 13 1 4 6 25 1
# 14 1 4 6 26 0
或者,对于x1 == 1
,您希望比较所有行的数据:
for(i in 1:nrow(mydata)){
if(mydata[i,1] == 0){ # x1 == 0
mydata[i,]$newvar1 =
sum(mydata[-i,1] == 1 & #count where x1 = 1
mydata[i,2] == mydata[-i,2] & # and where (x2 == x2) & (x3 == x3)
mydata[i,3] == mydata[-i,3] &
mydata[-i,4] > 24) # and week > 24
}else{
mydata[i,]$newvar1 =
sum(mydata[,1] == 1 &
mydata[i,2] == mydata[,2] &
mydata[i,3] == mydata[,3] &
mydata[,4] > 25)
}
}
# mydata
# x1 x2 x3 week newvar1
# 1 0 1 1 0 0
# 2 0 2 3 0 0
# 3 0 3 5 0 2
# 4 0 1 1 0 0
# 5 0 2 3 0 0
# 6 0 3 5 0 2
# 7 1 1 1 1 0
# 8 1 2 4 30 1
# 9 1 3 5 50 2
# 10 1 1 2 22 0
# 11 1 2 1 52 1
# 12 1 3 5 36 2
# 13 1 4 6 25 1
# 14 1 4 6 26 1
答案 1 :(得分:0)
mydata$newvar1 <- ifelse(mydata$x1==0, sapply(seq_len(nrow(mydata)), function(i) with (mydata, sum(x1[week > 25 & x2==x2[i] & x3==x3[i]]))), 0)
mydata$newvar1 <- ifelse(mydata$x1==1, sapply(seq_len(nrow(mydata)), function(i) with (mydata, sum(x1[week < week[i] & week[i]!=0 & week-week[i]<25 & x2==x2[i] & x3==x3[i]]))), mydata$newvar1)
答案 2 :(得分:0)
使用dplyr
:
library(dplyr)
mydata %>% group_by(x2, x3) %>%
mutate(newvar1 = ifelse(x1 == 0,
sum(x1 * week > 24),
sum(x1 * week > 25) - (week > 25) * (x1 == 1)))
# Source: local data frame [14 x 6]
# Groups: x2, x3 [7]
#
# x1 x2 x3 week newvar2 newvar1
# <int> <int> <int> <int> <int> <int>
# 1 0 1 1 0 0 0
# 2 0 2 3 0 2 0
# 3 0 3 5 0 0 2
# 4 0 1 1 0 0 0
# 5 0 2 3 0 2 0
# 6 0 3 5 0 0 2
# 7 1 1 1 1 0 0
# 8 1 2 4 30 0 0
# 9 1 3 5 50 1 1
# 10 1 1 2 22 0 0
# 11 1 2 1 52 0 0
# 12 1 3 5 36 0 1
# 13 1 4 6 25 0 1
# 14 1 4 6 26 0 0
else
条件中的奇怪位- (week > 25) * (x1 == 1)
,是从可能与自身匹配的行中减去1。