我正在寻找R中for循环的一种更快的替代方法。特别地,由于加法,可以提供向量缩减的中间结果的东西。 purrr::accumulate()
可以解决问题,但速度似乎很慢。以下显示了一个可重现的示例。
accumulate_values <- function(time_vector,
input_vector,
list_of_parameters)
{
number_samples <- length(time_vector)
time_steps <- c(0, diff(time_vector))
calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps
accumulated_values <- rep(0, number_samples)
for (i in 2:number_samples) {
accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])
}
return(accumulated_values)
}
library(tidyverse)
accumulate_values_purrr <- function(time_vector,
input_vector,
list_of_parameters)
{
number_samples <- length(time_vector)
time_steps <- c(0, diff(time_vector))
calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps
# accumulated_values <- rep(0, number_samples)
# for (i in 2:number_samples) {
# accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])
#
# }
accumulated_values <- calculation %>% purrr::accumulate(function(x, y) max(0, x + y))
return(accumulated_values)
}
# Data
Nums <- 1000000
my_time_vector <- seq(1, Nums, by = 1)
my_input_vector <- rnorm(Nums)
my_list_of_parameters <- list(K = 5, M = 0.01)
# Results
set.seed(1987)
library(tictoc)
# With for-loop
tic()
answer1 <- accumulate_values(my_time_vector,
my_input_vector,
my_list_of_parameters)
toc()
## 1.73 sec elapsed
# With purrr::accumulate
tic()
answer2 <- accumulate_values_purrr(my_time_vector,
my_input_vector,
my_list_of_parameters)
toc()
## 5.93 sec elapsed
identical(answer1, answer2)
## [1] TRUE
如何使accumulate()
更快?有更快的替代方法吗?
答案 0 :(得分:0)
这是尝试。我使用{bench}
-package提供了更准确的微基准测试。
library(purrr)
accumulate_values <- function(time_vector,
input_vector,
list_of_parameters)
{
number_samples <- length(time_vector)
time_steps <- c(0, diff(time_vector))
calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps
accumulated_values <- rep(0, number_samples)
for (i in 2:number_samples) {
accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])
}
return(accumulated_values)
}
accumulate_values_purrr <- function(time_vector,
input_vector,
list_of_parameters)
{
number_samples <- length(time_vector)
time_steps <- c(0, diff(time_vector))
calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps
# accumulated_values <- rep(0, number_samples)
# for (i in 2:number_samples) {
# accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])
#
# }
accumulated_values <- calculation %>% purrr::accumulate(function(x, y) max(0, x + y))
return(accumulated_values)
}
# Data
set.seed(1987)
Nums <- 1000000
# Nums <- 1000
time_vector <- seq(1, Nums, by = 1)
input_vector <- rnorm(Nums)
list_of_parameters <- list(K = 5, M = 0.01)
number_samples <- length(time_vector)
time_steps <- c(0, diff(time_vector))
calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps
answer <- accumulate_values_purrr(time_vector,
input_vector,
list_of_parameters)
Rcpp::cppFunction(
plugins = "cpp11",
"std::vector<double> process(NumericVector calculation) {
// NumericVector result (calculation.length(), 0.0);
std::vector<double> result;
// result.capacity(calculation.length());
std::accumulate(calculation.begin(),
calculation.end(),
0.0,
[&result](double x, double y){
auto new_entry = std::max(0.0, x + y);
result.push_back(new_entry);
return new_entry;
});
return result;
}")
Rcpp::cppFunction(
plugins = "cpp11",
"std::vector<double> process_with_cap(NumericVector calculation) {
std::vector<double> result;
result.reserve(calculation.length());
std::accumulate(calculation.cbegin(),
calculation.cend(),
0.0,
[&result](double x, double y){
auto new_entry = std::max(0.0, x + y);
result.push_back(new_entry);
return new_entry;
});
return result;
}")
bench::mark(for_loopa = accumulate_values(time_vector,
input_vector,
list_of_parameters),
rcpp_process = process(calculation),
rcpp_process_with_cap = process_with_cap(calculation),
purrr_accumulate = accumulate_values_purrr(time_vector,
input_vector,
list_of_parameters)) %>%
bench:::summary.bench_mark(relative = TRUE)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> # A tibble: 4 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 for_loopa 44.0 47.4 8.36 8.01 1.38
#> 2 rcpp_process 1.37 1.38 279. 1 1
#> 3 rcpp_process_with_cap 1 1 375. 1 1.60
#> 4 purrr_accumulate 412. 396. 1 14.0 1.22