我是R的初学者,这是我的代码:
for (i in 1:7){
testing<-vector(length=(length(yy)-3))
if(all(yy[i:(i+2)]==0))
testing[i]<-1
else
testing[i]<-NA
}
yy
指的是以下长度为10的向量:
> yy
[1] 1 0 0 0 0 1 0 1 0 1
testing
就像一个预测函数输出,如果yy
中的前3个元素都是0,它将预测1。如果不是,它将不会预测任何内容,因此NA。由于yy
总共有10个元素,testing
总共有7个元素(因此长度为7)但是,不是给我1s和NA的输出,而是给出这个:
> testing
[1] FALSE FALSE FALSE FALSE FALSE FALSE NA
我无法弄清楚为什么,一些帮助会很棒。谢谢。
答案 0 :(得分:5)
您应该在循环外定义testing
:
testing<-vector(length=(length(yy)-3))
for (i in 1:7){
if(all(yy[i:(i+2)]==0))
testing[i]<-1
else
testing[i]<-NA
}
testing
[1] NA 1 1 NA NA NA NA
对于此任务,您还可以使用rollapply
中的zoo
:
library(zoo)
rollapply(yy, 3, function(x) ifelse(all(x == 0), 1, NA))
[1] NA 1 1 NA NA NA NA NA
答案 1 :(得分:4)
以下是一些解决此问题的其他矢量化方法
基础r stats::filter
N <- 3
NA^(stats::filter(yy == 0, rep(1, N), sides = 1)[-(1:N-1)] != N)
# [1] NA 1 1 NA NA NA NA NA
data.table::shift
NA^(Reduce(`+`, data.table::shift(yy == 0, 0:(N-1)))[-(1:N-1)] != N)
# [1] NA 1 1 NA NA NA NA NA
RcppRoll::roll_sum
NA^(RcppRoll::roll_sum(yy == 0, N) != N)
# [1] NA 1 1 NA NA NA NA NA
一些根据(我还使用compiler::cmpfun
和两个更有效的zoo
解决方案添加了for循环的编译版本)
ForLoop <- function(yy, N){
testing<-vector(length=(length(yy)-N))
for (i in 1:length(testing)){
if(all(yy[i:(i+(N-1))]==0))
testing[i]<-1
else
testing[i]<-NA
}
testing
}
ForLoopBin <- compiler::cmpfun(ForLoop)
ZOO <- function(yy, N) zoo::rollapply(yy, N, function(x) ifelse(all(x == 0), 1, NA))
ZOO2 <- function(yy, N) NA^!zoo::rollapply(yy == 0, N, all)
ZOO3 <- function(yy, N) NA^(zoo::rollsum(yy == 0, N) != N)
RCPPROLL <- function(yy, N) NA^(RcppRoll::roll_sum(yy == 0, N) != N)
BaseFilter <- function(yy, N) NA^(stats::filter(yy == 0, rep(1, N), sides = 1)[-(1:N-1)] != N)
DTShift <- function(yy, N) NA^(Reduce(`+`, data.table::shift(yy == 0, 0:(N-1)))[-(1:N-1)] != N)
set.seed(123)
yy <- sample(0:1, 1e4, replace = TRUE)
N <- 3
library(microbenchmark)
microbenchmark(
"for loop" = ForLoop(yy, N),
"Compiled for loop" = ForLoopBin(yy, N),
"zoo1" = ZOO(yy, N),
"zoo2" = ZOO2(yy, N),
"zoo3" = ZOO3(yy, N),
"Rcpproll" = RCPPROLL(yy, N),
"stats::filter" = BaseFilter(yy, N),
"data.table::shift" = DTShift(yy, N)
)
# Unit: microseconds
# expr min lq mean median uq max neval cld
# for loop 25917.837 26858.936 30157.3927 28546.2595 29334.2430 110135.205 100 d
# Compiled for loop 7559.837 8208.142 9709.7256 8882.6875 9428.9155 22683.347 100 c
# zoo1 101699.548 107857.014 112210.5929 110402.3985 113335.7745 171537.068 100 f
# zoo2 72265.949 77788.709 81275.9028 80292.8135 81917.8985 153197.948 100 e
# zoo3 4584.861 4734.778 4939.3528 4785.9770 4853.6560 13228.514 100 b
# Rcpproll 216.636 246.076 290.7211 290.0745 311.3540 663.667 100 a
# stats::filter 425.912 475.350 536.0757 509.5900 544.6295 1497.568 100 a
# data.table::shift 334.394 365.593 443.2138 409.4325 424.6320 1944.279 100 a