R - 我的条件和嵌套for循环花费的时间太长。如何矢量化?

时间:2013-05-13 01:15:05

标签: r

我有一个for循环的代码,其中包含一个条件if语句,其中包括搜索逻辑模式更改的最后10个循环周期,并在true上增加一个计数器:

for(ii in 100:sp) 
{
     if(start(ii) == 1 && mean(start(ii-11:ii-1)) == 0)
     {
          count = count + 1
     }
}

sp的值 3,560,400 ;意味着300万次以上的循环。加载时间很长,执行循环大约需要40分钟。

如何使用矢量化来优化此代码?

感谢您的帮助。

修改

我已将所有代码都包含在此处,以便每个人都可以看到发生了什么:

# store the current directory
initial.dir<-getwd()
# change to the new directory
setwd("H:/R/range")
# load the necessary libraries - sound
library(sound)
library(TTR)

#####################################################
# LOAD .WAV FILE / SET OUTPUT FILE
#####################################################

# set the output file
sink("rangetmp3.out")

# load the dataset
sndSample <- loadSample('range.wav')
FS <- rate(sndSample)
nBits <- bits(sndSample)

# assign sound wave
Y <- sound(sndSample)



#####################################################
# CONSTANTS
#####################################################

#(m/s) speed of light
c <- 3E8 

############################
# |- Radar parameters
############################

#(s) pulse time
Tp <- 20E-3

# number of samples per pulse
N <- Tp*FS

#(Hz) LFM start frequency for example
fstart <- 2260E6

#(Hz) LFM stop frequency for example
fstop <- 2590E6

#(Hz) LFM start frequency for ISM band
fstart <- 2402E6

#(Hz) LFM stop frequency for ISM band
fstop <- 2495E6

#(Hz) transmti bandwidth
BW <- fstop-fstart

#instantaneous transmit frequency
f <- seq(fstart, fstop, length=N/2) 

#range resolution
rr <- c/(2*BW)
max_range <- rr*N/2

#####################################################
# RANGE
#####################################################

############################
# |- The input appears to be inverted
############################

trig <- -1*Y[1,]
s <- -1*Y[2,]

#reset Y
#Y = 0

############################
# |- Parse the data here by triggering off rising edge of sync pulse
############################

# reset counter / threshold
count <- 0
thresh <- 0

# assign logical vector that meets thresh

start <- (trig > thresh)

sp <- NROW(start)-N

for(ii in 100:sp) 
{
     if(start(ii) == 1 && mean(start(ii-11:ii-1)) == 0)
     {
          count = count + 1
     }
}

编辑#2:Matlab循环

for ii = 100:(size(start,1)-N) 
    if start(ii) == 1 && mean(start(ii-11:ii-1)) == 0 
        count = count + 1; 
        sif(count,:) = s(ii:ii+N-1); 
        time(count) = ii*1/FS; 
    end
end

1 个答案:

答案 0 :(得分:1)

你可以用单行两行来完成,如下所示:

library(TTR)
shiftIndx <- c(6:length(start), 1:5)
count <- sum(start == 1  &  SMA(start, n=11)[shiftIndx] == 0, na.rm=TRUE)

其中SMA来自TTR个套件

这是一个片断的解释

## this will give you a logical vector of T/F
##    whose length will be equal to length of start
start == 1

## This part give you a smiple moving average of every 11 numbers
SMA(start, n=11)

## We need to shift these over so that they are centered properly. 
##   `shiftIndx` takes the first `floor(11/2)` MA's and puts them at the tail. 
##    (notice that these will be NA.)
shiftIndx <- c(6:length(start), 1:5)
SMA(start, n=11)[shiftIndx]

## .. checking if it is equal to 0 will give you another T/F vector, 
##     also of length equal to the length of start
SMA(start, n=11)[shiftIndx]  == 0

## Then combining the T/F vectors using `&` gives another T/F vector 
##   of the same length, but is TRUE only where _both_ of the original 
##   vectors are TRUE
(start == 1)   &   (SMA(start, n=11)[shiftIndx] == 0)

## Taking the `sum(.)` of a logical vector is the equivalent of counting
##    how many TRUE values are in the vector. 
## Snce you are incrementing `count` every time both conditional statements are TRUE, 
##    the final value of `count` will be the same as the sum below
sum( (start == 1)   &   (SMA(start, n=11)[shiftIndx] == 0), na.rm=TRUE)

And here are some benchmarks (before the edit of adding `shiftIndx`)

set.seed(7)
for (N in 10^(6:8)) {
  cat("N is: ", N, "\n")
  start <- round(rnorm(N, 0, 10))    
  print(system.time(
    count <- sum(start == 1  &  SMA(start, n=11) == 0, na.rm=TRUE)
  ))
  cat("count is ", count, "\n", rep("~", 14), "\n\n", sep="")
}

## OUTPUT: 

N is:  1e+06 
   user  system elapsed 
  0.059   0.021   0.081 
count is 485
~~~~~~~~~~~~~~

N is:  1e+07 
   user  system elapsed 
  0.578   0.195   0.768 
count is 4889
~~~~~~~~~~~~~~

N is:  1e+08 
   user  system elapsed 
  7.808   5.716  19.392 
count is 49737
~~~~~~~~~~~~~~