高效的双回路,最大程度地提高了运行效率

时间:2018-12-09 15:56:08

标签: r performance for-loop vectorization

R中以下双重for循环的有效实现是什么?

set.seed(1)
u <- rnorm(100, 1)
v <- rnorm(100, 2)
x <- rnorm(100, 3)
y <- rnorm(100, 4)
sum = 0
for (i in 1:100){
  for (j in 1:100) {
    sum = sum + (1 - max(u[i], v[j])) * (1 - max(x[i], y[j]))
  }
}

特别是对于很长的向量,评估需要花费相当长的时间,但是我想知道是否有一种方法可以向量化这个双重for循环?非常感谢。

3 个答案:

答案 0 :(得分:3)

类似于@www给出的(但以R为底)

uv <- expand.grid(u, v)
xy <- expand.grid(x, y)

sum((1 - do.call(pmax, uv))*(1 - do.call(pmax, xy)))

# [1] 37270.31

基准

library(microbenchmark)

microbenchmark(
  original = {
    SUM <- 0
    for (i in 1:100){
      for (j in 1:100) {
        SUM <- SUM + (1 - max(u[i], v[j])) * (1 - max(x[i], y[i]))
      }
    }
  }
  , tidyverse = {
      dat <- data_frame(u, v, x, y)
      dat2 <- dat %>% complete(nesting(u, x), nesting(v, y))

      sum(with(dat2, (1 - pmax(u, v)) * (1 - pmax(x, y))))
    }
  , expand = {
      uv <- expand.grid(u, v)
      xy <- expand.grid(x, y)

      sum((1 - do.call(pmax, uv))*(1 - do.call(pmax, xy)))
    }
  , outer = sum((1 - outer(u, v, pmax))*(1 - outer(x, y, pmax)))
)

# Unit: microseconds
#       expr       min         lq       mean     median        uq        max neval
#   original 12512.838 14315.3480 18210.6801 15189.9525 17504.480 217572.149   100
#  tidyverse  4373.285  4924.0305  5812.2483  5603.1585  6044.828  14461.375   100
#     expand   843.972   961.2120  1163.5428  1061.9080  1219.674   2865.911   100
#      outer   228.823   252.7905   301.5965   285.5315   322.832    686.055   100

答案 1 :(得分:3)

我的速度更快。它使用outer而不是循环,这意味着循环。

首先,不需要外部程序包的功能,OP,user20650's comment中的程序和我的程序。

original <- function(u, v, x, y){
  sum1 = 0
  for (i in seq_along(u)){
    for (j in seq_along(v)) {
      sum1 = sum1 + (1 - max(u[i], v[j])) * (1 - max(x[i], y[j]))
    }
  }
  sum1
}

comment <- function(u, v, x, y){
  sum1 = 0
  for (i in seq_along(u)){
    sum1 = sum1 + (1 - pmax(u[i], v)) * (1 - pmax(x[i], y))
  }
  sum(sum1)
}

rui <- function(u, v, x, y){
  tmp1 <- outer(u, v, pmax)
  tmp2 <- outer(x, y, pmax)
  sum((1 - tmp1) * (1 - tmp2))
}

现在在www's answerIceCreamToucan's answer中使用功能。

library(tidyverse)

www <- function(u, v, x, y){
  dat <- data_frame(u, v, x, y)
  dat2 <- dat %>% complete(nesting(u, x), nesting(v, y))
  SUM2 <- sum(with(dat2, (1 - pmax(u, v)) * (1 - pmax(x, y))))
  SUM2
}

IceCream <- function(u, v, x, y){
  uv <- expand.grid(u, v)
  xy <- expand.grid(x, y)
  sum((1 - do.call(pmax, uv))*(1 - do.call(pmax, xy)))
}

对它们全部进行测试,以查看结果是否相同。请注意,存在浮点问题。

set.seed(1234)

u <- rnorm(1e2, 1)
v <- rnorm(1e2, 2)
x <- rnorm(1e2, 3)
y <- rnorm(1e2, 4)

o <- original(u, v, x, y)
c <- comment(u, v, x, y)
w <- www(u, v, x, y)
i <- IceCream(u, v, x, y)
r <- rui(u, v, x, y)

all.equal(o, c)
all.equal(o, w)
all.equal(o, i)
all.equal(o, r)

o - c
o - w
o - r
w - r
i - r
c - r

现在进行速度测试。

library(microbenchmark)
library(ggplot2)

mb <- microbenchmark(
  loop = original(u, v, x, y),
  pmax = comment(u, v, x, y),
  tidy = www(u, v, x, y),
  ice = IceCream(u, v, x, y),
  outer = rui(u, v, x, y)
)

autoplot(mb)

enter image description here

答案 2 :(得分:1)

这是您代码的输出。

import React, {Component} from "react";
import { Button, Progress } from 'reactstrap';
import "../src/Questions.css"

class Questions extends React.Component {



     handleClick=()=>{
         alert(this.state.value);
         this.setState({
             value:this.state.value +20
         })


     }

    render() {

        this.state = {

            value:10
}

        return(
            <div>
                <div><Progress value={this.state.value} /></div>
                <div className="howMuchText">How much does it cost to build an app</div>


                <div className="nextButton">
                <Button onClick={this.handleClick} color="primary" size="lg">Next</Button>
                </div>
            </div>
        )
    }
}

export default Questions;

使用set.seed(1) u <- rnorm(100, 1) v <- rnorm(100, 2) x <- rnorm(100, 3) y <- rnorm(100, 4) SUM <- 0 for (i in 1:100){ for (j in 1:100) { SUM <- SUM + (1 - max(u[i], v[j])) * (1 - max(x[i], y[j])) } } SUM # [1] 37270.31 tidyverse可以生成相同的输出。我们首先需要为每个向量创建正确的组合。然后,我们可以使用pmap来计算结果。

pmap

library(tidyverse) dat <- data_frame(u, v, x, y) dat2 <- dat %>% complete(nesting(u, x), nesting(v, y)) SUM2 <- sum(with(dat2, (1 - pmax(u, v)) * (1 - pmax(x, y)))) SUM2 # [1] 37270.31 tidyversse方法比pmap快。

for-loop