最好用一个例子来解释。
我有一个名为data.frame
的{{1}}的向量或列:
vec
我想要一个矢量化过程(不是vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
循环),以便在观察到for
时更改三个尾随NA
。
结束向量是:
1
如果我们有:
c(NA, NA, 1, 1, 1, 1, NA, 1, 1, 1, 1, NA, NA, NA)
结束向量看起来像:
vec <- c(NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA)
一个写得非常糟糕的解决方案是:
c(NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA)
答案 0 :(得分:17)
另一种选择:
`[<-`(vec,c(outer(which(vec==1),1:3,"+")),1)
# [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
虽然以上内容适用于示例,但如果在最后位置找到1,则会延长vec
的长度。最好做一个简单的检查并包装成一个函数:
threeNAs<-function(vec) {
ind<-c(outer(which(vec==1),1:3,"+"))
ind<-ind[ind<=length(vec)]
`[<-`(vec,ind,1)
}
答案 1 :(得分:13)
另一个快速解决方案:
vec[rep(which(vec == 1), each = 3) + c(1:3)] <- 1
给出:
> vec [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
基准测试仅适用于较大的数据集。一个带有10k大向量的基准测试和几个已发布的解决方案:
library(microbenchmark)
microbenchmark(ans.jaap = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[rep(which(vec == 1), each = 3) + c(1:3)] <- 1},
ans.989 = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
r <- which(vec==1);
vec[c(mapply(seq, r, r+3))] <- 1},
ans.sotos = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[unique(as.vector(t(sapply(which(vec == 1), function(i) seq(i+1, length.out = 3)))))] <- 1},
ans.gregor = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[is.na(vec)] <- 0;
n <- length(vec);
vec <- vec + c(0, vec[1:(n-1)]) + c(0, 0, vec[1:(n-2)]) + c(0, 0, 0, vec[1:(n-3)]);
vec[vec == 0] <- NA},
ans.moody = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
output <- sapply(1:length(vec),function(i){any(!is.na(vec[max(0,i-3):i]))});
output[output] <- 1;
output[output==0] <- NA},
ans.nicola = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
`[<-`(vec,c(outer(which(vec==1),1:3,"+")),1)})
给出了以下基准:
Unit: microseconds expr min lq mean median uq max neval cld ans.jaap 1778.905 1937.414 3064.686 2100.595 2257.695 86233.593 100 a ans.989 87688.166 89638.133 96992.231 90986.269 93326.393 182431.366 100 c ans.sotos 125344.157 127968.113 132386.664 130117.438 132951.380 214460.174 100 d ans.gregor 4036.642 5824.474 10861.373 6533.791 7654.587 87806.955 100 b ans.moody 173146.810 178369.220 183698.670 180318.799 184000.062 264892.878 100 e ans.nicola 966.927 1390.486 1723.395 1604.037 1904.695 3310.203 100 a
答案 2 :(得分:5)
究竟是什么&#39; vectorised&#39;,如果不是用C语言编写的循环?
这是一个基准测试很好的C ++循环。
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
library(Rcpp)
cppFunction('NumericVector fixVec(NumericVector myVec){
int n = myVec.size();
int foundCount = 0;
for(int i = 0; i < n; i++){
if(myVec[i] == 1) foundCount = 1;
if(ISNA(myVec[i])){
if(foundCount >= 1 & foundCount <= 3){
myVec[i] = 1;
foundCount++;
}
}
}
return myVec;
}')
fixVec(vec)
# [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
基准
library(microbenchmark)
microbenchmark(
ans.jaap = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[rep(which(vec == 1), each = 4) + c(0:3)] <- 1
},
ans.nicola = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
`[<-`(vec,c(outer(which(vec==1),0:3,"+")),1)
},
ans.symbolix = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec <- fixVec(vec)
}
)
# Unit: microseconds
# expr min lq mean median uq max neval
# ans.jaap 2017.789 2264.318 2905.2437 2579.315 3588.4850 4667.249 100
# ans.nicola 1242.002 1626.704 3839.4768 2095.311 3066.4795 81299.962 100
# ans.symbolix 504.577 533.426 838.5661 718.275 966.9245 2354.373 100
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4)
vec <- fixVec(vec)
vec2 <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4)
vec2[rep(which(vec2 == 1), each = 4) + c(0:3)] <- 1
identical(vec, vec2)
# [1] TRUE
答案 3 :(得分:3)
以下代码执行您要求的操作。它涉及&#34;转移&#34;向量然后添加移位版本
vec[is.na(vec)] <- 0
n <- length(vec)
vec <- vec + c(0, vec[1:(n-1)]) + c(0, 0, vec[1:(n-2)]) + c(0, 0, 0, vec[1:(n-3)])
vec[vec == 0] <- NA
vec[vec != 0] <- 1
# vec | 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 ,0, 0
# c(0, vec[1:(n-1)]) | + 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0 ,0, 0
# c(0, 0, vec[1:(n-2)]) | + 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0 ,0
# c(0,0,0,vec[1:(n-3)]) | + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0
# |-------------------------------------------
# | 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0
答案 4 :(得分:2)
非矢量化解决方案,但是,使用基础R的另一个选项,
vec[unique(as.vector(t(sapply(which(vec == 1), function(i) seq(i+1, length.out = 3)))))] <- 1
vec
#[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
vec1[unique(as.vector(t(sapply(which(vec1 == 1), function(i) seq(i+1, length.out = 3)))))] <- 1
vec1
#[1] NA NA 1 1 1 1 1 1 1 1 1 NA NA NA
答案 5 :(得分:0)
使用sapply
,any
和is.na
:
output <- sapply(1:length(vec),function(i){any(!is.na(vec[max(0,i-3):i]))})
output[output] <- 1
output[output==0] <- NA
答案 6 :(得分:0)
这个怎么样:
r <- which(vec==1)
vec[c(mapply(seq, r, r+3))] <- 1
示例:
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
#[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
vec <- c(NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA)
#[1] NA NA 1 1 1 1 1 1 1 1 1 NA NA NA