我刚刚开始学习去听朋友的推荐。到目前为止,我很喜欢它,但我写了(我认为会是)轻量级并发的强大功能的完美例子,并得到了令人惊讶的结果......所以我怀疑我做错了什么,或者我是误解了goroutines的价格。我希望这里的一些地鼠可以提供见解。
我使用goroutine和简单的同步执行在Go中编写了Chudnovsky的算法。我假设,每次计算都独立于其他计算,它至少会同时运行得更快一些。
note :我在第5代i7上运行它,所以如果goroutines被多路复用到线程上,就像我被告知的那样,这应该是并发的和并行。< / p>
package main
import (
"fmt"
"math"
"strconv"
"time"
)
func main() {
var input string
var sum float64
var pi float64
c := make(chan float64)
fmt.Print("How many iterations? ")
fmt.Scanln(&input)
max,err := strconv.Atoi(input)
if err != nil {
panic("You did not enter a valid integer")
}
start := time.Now() //start timing execution of concurrent routine
for i := 0; i < max; i++ {
go chudnovskyConcurrent(i,c)
}
for i := 0; i < max; i++ {
sum += <-c
}
end := time.Now() //end of concurrent routine
fmt.Println("Duration of concurrent calculation: ",end.Sub(start))
pi = 1/(12*sum)
fmt.Println(pi)
start = time.Now() //start timing execution of syncronous routine
sum = 0
for i := 0; i < max; i++ {
sum += chudnovskySync(i)
}
end = time.Now() //end of syncronous routine
fmt.Println("Duration of synchronous calculation: ",end.Sub(start))
pi = 1/(12*sum)
fmt.Println(pi)
}
func chudnovskyConcurrent(i int, c chan<- float64) {
var numerator float64
var denominator float64
ifloat := float64(i)
iun := uint64(i)
numerator = math.Pow(-1, ifloat) * float64(factorial(6*iun)) * (545140134*ifloat+13591409)
denominator = float64(factorial(3*iun)) * math.Pow(float64(factorial(iun)),3) * math.Pow(math.Pow(640320,3),ifloat+0.5)
c <- numerator/denominator
}
func chudnovskySync(i int) (r float64) {
var numerator float64
var denominator float64
ifloat := float64(i)
iun := uint64(i)
numerator = math.Pow(-1, ifloat) * float64(factorial(6*iun)) * (545140134*ifloat+13591409)
denominator = float64(factorial(3*iun)) * math.Pow(float64(factorial(iun)),3) * math.Pow(math.Pow(640320,3),ifloat+0.5)
r = numerator/denominator
return
}
func factorial(n uint64) (res uint64) {
if ( n > 0 ) {
res = n * factorial(n-1)
return res
}
return 1
}
以下是我的结果:
How many iterations? 20
Duration of concurrent calculation: 573.944µs
3.1415926535897936
Duration of synchronous calculation: 63.056µs
3.1415926535897936
答案 0 :(得分:2)
您正在进行的计算太简单了,无法在单独的goroutine中执行每一项计算。与实际计算相比,您在运行时(创建goroutine,多路复用,调度等)中浪费的时间更多。你正在尝试做的更适合GPU,例如,你有大量的并行执行单元,可以在瞬间完成这些简单的计算。但是你需要其他语言和API才能做到这一点。
您可以做的是为每个执行的硬件线程创建执行软件线程。您希望将max
变量拆分为大块并并行执行。这里只是为了说明这个想法而非常简单:
package main
import (
"fmt"
"math"
"strconv"
"time"
"runtime"
)
func main() {
var input string
var sum float64
var pi float64
c := make(chan float64, runtime.GOMAXPROCS(-1))
fmt.Print("How many iterations? ")
fmt.Scanln(&input)
max,err := strconv.Atoi(input)
if err != nil {
panic("You did not enter a valid integer")
}
start := time.Now() //start timing execution of concurrent routine
for i := 0; i < runtime.GOMAXPROCS(-1); i++ {
go func(i int){
var sum float64
for j := 0; j < max/runtime.GOMAXPROCS(-1); j++ {
sum += chudnovskySync(j + i*max/runtime.GOMAXPROCS(-1))
}
c <- sum
}(i)
}
for i := 0; i < runtime.GOMAXPROCS(-1); i++ {
sum += <-c
}
end := time.Now() //end of concurrent routine
fmt.Println("Duration of concurrent calculation: ",end.Sub(start))
pi = 1/(12*sum)
fmt.Println(pi)
start = time.Now() //start timing execution of syncronous routine
sum = 0
for i := 0; i < max; i++ {
sum += chudnovskySync(i)
}
end = time.Now() //end of syncronous routine
fmt.Println("Duration of synchronous calculation: ",end.Sub(start))
pi = 1/(12*sum)
fmt.Println(pi)
}
func chudnovskySync(i int) (r float64) {
var numerator float64
var denominator float64
ifloat := float64(i)
iun := uint64(i)
numerator = math.Pow(-1, ifloat) * float64(factorial(6*iun)) * (545140134*ifloat+13591409)
denominator = float64(factorial(3*iun)) * math.Pow(float64(factorial(iun)),3) * math.Pow(math.Pow(640320,3),ifloat+0.5)
r = numerator/denominator
return
}
func factorial(n uint64) (res uint64) {
if ( n > 0 ) {
res = n * factorial(n-1)
return res
}
return 1
}
这是结果
$ go version
go version go1.5.2 windows/amd64
$ go run main.go
GOMAXPROCS = 4
How many iterations? 10000
Duration of concurrent calculation: 932.8916ms
NaN
Duration of synchronous calculation: 2.0639744s
NaN
答案 1 :(得分:0)
我同意,你的计算没有做足够的处理来克服多个goroutine的开销。只是为了好玩,我修改了你的代码,在返回结果之前多次进行计算(1000,10000,100000,1000000)。我在四核Xeon上运行的Mac OS X Yosemite下运行了这个(20次迭代),并且,正如您所料,同步版本的版本大约是并行版本的四倍。
我注意到一件有趣的事情是,通过大量重复,同步版本实际上需要的时间是并行版本的四倍多。我猜这与英特尔的超线程架构有关,它允许每个核心内部达到某种程度的并行性,但我不确定。