Question

在R.工作数据跟踪大脑活动随时间的变化。列“标记”包含特定治疗开始和结束时的信息。例如，第一个条件（标记== 1）从第3行开始，到第6行结束。第二个实验条件（标记== 2）从第9行开始，到第12行结束。在行之间重复另一批处理1 15和18。

ob.id <- c(1:20)
mark <- c(0,0,1,0,0,1,0,0,2,0,0,2,0,0,1,0,0,1,0,0)
condition<-c(0,0,1,1,1,1,0,0,2,2,2,2,0,0,1, 1,1,1,0,0)
start <- data.frame(ob.id,mark)
result<-data.frame(ob.id,mark,condition)
print (start)
> print (start)
   ob.id mark
1      1    0
2      2    0
3      3    1
4      4    0
5      5    0
6      6    1
7      7    0
8      8    0
9      9    2
10    10    0
11    11    0
12    12    2
13    13    0
14    14    0
15    15    1
16    16    0
17    17    0
18    18    1
19    19    0
20    20    0

我需要创建一个列，该列具有一个虚拟变量，指示相应实验条件下观察的成员资格，如下所示：

> print(result)
   ob.id mark condition
1      1    0         0
2      2    0         0
3      3    1         1
4      4    0         1
5      5    0         1
6      6    1         1
7      7    0         0
8      8    0         0
9      9    2         2
10    10    0         2
11    11    0         2
12    12    2         2
13    13    0         0
14    14    0         0
15    15    1         1
16    16    0         1
17    17    0         1
18    18    1         1
19    19    0         0
20    20    0         0

感谢您的帮助！

Answer 1

这是我能想到的一种方式：

#  Find where experiments stop and start
ind <- which( result$mark != 0 )
[1]  3  6  9 12 15 18

#  Make a matrix of the start and stop indices taking odd and even elements of the vector
idx <- cbind( head(ind , -1)[ 1:length(ind) %% 2 == 1 ] ,tail( ind , -1)[ 1:length(ind) %% 2 == 1 ] )
     [,1] [,2]
[1,]    3    6
[2,]    9   12
[3,]   15   18

修改

我意识到只需使用奇数和偶数元素就可以使上述索引矩阵更容易：

idx <- cbind( ind[ 1:length(ind) %% 2 == 1 ] , ind[ 1:length(ind) %% 2 != 1 ] )


#  Make vector of row indices to turn to 1's
ones <- as.vector( apply( idx , 1 , function(x) c( x[1]:x[2] ) ) )

#  Make your new column and turn appropriate rows to 1
result$condition <- 0
result$condition[ ones ] <- 1
result
#   ob.id mark condition
#1      1    0         0
#2      2    0         0
#3      3    1         1
#4      4    1         1
#5      5    1         1
#6      6    1         1
#7      7    0         0
#8      8    0         0
#9      9    1         1
#10    10    1         1
#11    11    1         1
#12    12    1         1
#13    13    0         0
#14    14    0         0
#15    15    1         1
#16    16    1         1
#17    17    1         1
#18    18    1         1
#19    19    0         0
#20    20    0         0

修改

@eddi指出我需要把实验的价值放进去，而不只是一个。所以这是另一种使用 gasp （！）一个for循环的策略。如果您有~~数百万~~成千上万的实验（记得预先分配结果向量），这只会是非常有害的：

ind <- matrix( which( start$mark != 0 ) , ncol = 2 , byrow = TRUE )
ind <- cbind( ind , start$mark[ ind[ , 1 ] ] )
#     [,1] [,2] [,3]
#[1,]    3    6    1
#[2,]    9   12    2
#[3,]   15   18    1

res <- integer( nrow( start ) )

for( i in 1:nrow(ind) ){
  res[ ind[i,1]:ind[i,2] ] <- ind[i,3]
}
[1] 0 0 1 1 1 1 0 0 2 2 2 2 0 0 1 1 1 1 0 0

Answer 2

这是一个有趣的小问题。我在下面使用的技巧是首先计算rle向量的mark，这会使问题更简单，因为生成的values向量将始终只有一个0，可能会也可能不会需要更换（取决于周围的值）。

# example vector with some edge cases
v = c(0,0,1,0,0,0,1,2,0,0,2,0,0,1,0,0,0,0,1,2,0,2)

v.rle = rle(v)
v.rle
#Run Length Encoding
#  lengths: int [1:14] 2 1 3 1 1 2 1 2 1 4 ...
#  values : num [1:14] 0 1 0 1 2 0 2 0 1 0 ...

vals = rle(v)$values

# find the 0's that need to be replaced and replace by the previous value
idx = which(tail(head(vals,-1),-1) == 0 & (head(vals,-2) == tail(vals,-2)))
vals[idx + 1] <- vals[idx]

# finally go back to the original vector
v.rle$values = vals
inverse.rle(v.rle)
# [1] 0 0 1 1 1 1 1 2 2 2 2 0 0 1 1 1 1 1 1 2 2 2

最不麻烦的事情可能是将上述内容放在一个函数中，然后将其应用于data.frame向量（而不是明确地操作向量）。

另一种方法，基于@ SimonO101的观察，涉及从起始数据构建正确的组（分别运行by部分，逐一查看它是如何工作的）：

library(data.table)
dt = data.table(start)

dt[, result := mark[1],
     by = {tmp = rep(0, length(mark));
           tmp[which(mark != 0)[c(F,T)]] = 1;
           cumsum(mark != 0) - tmp}]
dt
#    ob.id mark result
# 1:     1    0      0
# 2:     2    0      0
# 3:     3    1      1
# 4:     4    0      1
# 5:     5    0      1
# 6:     6    1      1
# 7:     7    0      0
# 8:     8    0      0
# 9:     9    2      2
#10:    10    0      2
#11:    11    0      2
#12:    12    2      2
#13:    13    0      0
#14:    14    0      0
#15:    15    1      1
#16:    16    0      1
#17:    17    0      1
#18:    18    1      1
#19:    19    0      0
#20:    20    0      0

后一种方法可能更灵活。

根据序列中的位置分配新值

2 个答案:

修改

修改