Go Concurrency:Chudnovky的算法,慢于同步

时间:2016-03-19 16:14:09

标签: go concurrency parallel-processing

我刚刚开始学习去听朋友的推荐。到目前为止,我很喜欢它,但我写了(我认为会是)轻量级并发的强大功能的完美例子,并得到了令人惊讶的结果......所以我怀疑我做错了什么,或者我是误解了goroutines的价格。我希望这里的一些地鼠可以提供见解。

我使用goroutine和简单的同步执行在Go中编写了Chudnovsky的算法。我假设,每次计算都独立于其他计算,它至少会同时运行得更快一些。

note :我在第5代i7上运行它,所以如果goroutines被多路复用到线程上,就像我被告知的那样,这应该是并发的并行。< / p>

 package main

import (
  "fmt"
  "math"
  "strconv"
  "time"
)

func main() {
  var input string
  var sum float64
  var pi float64
  c := make(chan float64)

  fmt.Print("How many iterations? ")
  fmt.Scanln(&input)
  max,err := strconv.Atoi(input)

  if err != nil {
    panic("You did not enter a valid integer")
  }
  start := time.Now() //start timing execution of concurrent routine

  for i := 0; i < max; i++ {
    go chudnovskyConcurrent(i,c)
  }

  for i := 0; i < max; i++ {
    sum += <-c
  }
  end := time.Now() //end of concurrent routine
  fmt.Println("Duration of concurrent calculation: ",end.Sub(start))
  pi = 1/(12*sum)
  fmt.Println(pi)

  start = time.Now() //start timing execution of syncronous routine
  sum = 0
  for i := 0; i < max; i++ {
    sum += chudnovskySync(i)
  }
  end = time.Now() //end of syncronous routine
  fmt.Println("Duration of synchronous calculation: ",end.Sub(start))
  pi = 1/(12*sum)
  fmt.Println(pi)
}

func chudnovskyConcurrent(i int, c chan<- float64) {
  var numerator float64
  var denominator float64
  ifloat := float64(i)
  iun := uint64(i)
  numerator = math.Pow(-1, ifloat) * float64(factorial(6*iun)) * (545140134*ifloat+13591409)
  denominator = float64(factorial(3*iun)) * math.Pow(float64(factorial(iun)),3) * math.Pow(math.Pow(640320,3),ifloat+0.5)
  c <- numerator/denominator
}

func chudnovskySync(i int) (r float64) {
  var numerator float64
  var denominator float64
  ifloat := float64(i)
  iun := uint64(i)
  numerator = math.Pow(-1, ifloat) * float64(factorial(6*iun)) * (545140134*ifloat+13591409)
  denominator = float64(factorial(3*iun)) * math.Pow(float64(factorial(iun)),3) * math.Pow(math.Pow(640320,3),ifloat+0.5)
  r = numerator/denominator
  return
}

func factorial(n uint64) (res uint64) {
  if ( n > 0 ) {
    res = n * factorial(n-1)
    return res
  }

  return 1
}

以下是我的结果:

How many iterations? 20
Duration of concurrent calculation:  573.944µs
3.1415926535897936
Duration of synchronous calculation:  63.056µs
3.1415926535897936

2 个答案:

答案 0 :(得分:2)

您正在进行的计算太简单了,无法在单独的goroutine中执行每一项计算。与实际计算相比,您在运行时(创建goroutine,多路复用,调度等)中浪费的时间更多。你正在尝试做的更适合GPU,例如,你有大量的并行执行单元,可以在瞬间完成这些简单的计算。但是你需要其他语言和API才能做到这一点。

您可以做的是为每个执行的硬件线程创建执行软件线程。您希望将max变量拆分为大块并并行执行。这里只是为了说明这个想法而非常简单:

package main

import (
  "fmt"
  "math"
  "strconv"
  "time"
  "runtime"
)

func main() {
  var input string
  var sum float64
  var pi float64
  c := make(chan float64, runtime.GOMAXPROCS(-1))
  fmt.Print("How many iterations? ")
  fmt.Scanln(&input)
  max,err := strconv.Atoi(input)

  if err != nil {
    panic("You did not enter a valid integer")
  }
  start := time.Now() //start timing execution of concurrent routine

  for i := 0; i < runtime.GOMAXPROCS(-1); i++ {
    go func(i int){
      var sum float64
      for j := 0; j < max/runtime.GOMAXPROCS(-1); j++  {
        sum += chudnovskySync(j + i*max/runtime.GOMAXPROCS(-1))
      }
      c <- sum
    }(i)
  }

  for i := 0; i < runtime.GOMAXPROCS(-1); i++ {
    sum += <-c
  }
  end := time.Now() //end of concurrent routine
  fmt.Println("Duration of concurrent calculation: ",end.Sub(start))
  pi = 1/(12*sum)
  fmt.Println(pi)

  start = time.Now() //start timing execution of syncronous routine
  sum = 0
  for i := 0; i < max; i++ {
    sum += chudnovskySync(i)
  }
  end = time.Now() //end of syncronous routine
  fmt.Println("Duration of synchronous calculation: ",end.Sub(start))
  pi = 1/(12*sum)
  fmt.Println(pi)
}

func chudnovskySync(i int) (r float64) {
  var numerator float64
  var denominator float64
  ifloat := float64(i)
  iun := uint64(i)
  numerator = math.Pow(-1, ifloat) * float64(factorial(6*iun)) * (545140134*ifloat+13591409)
  denominator = float64(factorial(3*iun)) * math.Pow(float64(factorial(iun)),3) * math.Pow(math.Pow(640320,3),ifloat+0.5)
  r = numerator/denominator
  return
}

func factorial(n uint64) (res uint64) {
  if ( n > 0 ) {
    res = n * factorial(n-1)
    return res
  }

  return 1
}

这是结果

$ go version
go version go1.5.2 windows/amd64

$ go run main.go
GOMAXPROCS = 4
How many iterations? 10000
Duration of concurrent calculation:  932.8916ms
NaN
Duration of synchronous calculation:  2.0639744s
NaN 

答案 1 :(得分:0)

我同意,你的计算没有做足够的处理来克服多个goroutine的开销。只是为了好玩,我修改了你的代码,在返回结果之前多次进行计算(1000,10000,100000,1000000)。我在四核Xeon上运行的Mac OS X Yosemite下运行了这个(20次迭代),并且,正如您所料,同步版本的版本大约是并行版本的四倍。

我注意到一件有趣的事情是,通过大量重复,同步版本实际上需要的时间是并行版本的四倍多。我猜这与英特尔的超线程架构有关,它允许每个核心内部达到某种程度的并行性,但我不确定。