序列化goroutine(并行但保证顺序)

时间:2018-07-03 14:41:52

标签: go parallel-processing

假设我们要并行处理一些计算,但是我们必须保证结果的顺序与计算的顺序相同:

例如,可以通过以下方式完成此操作:

https://play.golang.org/p/jQbo0EVLzvX

package main

import (
    "fmt"
    "time"
)

func main() {
    orderPutChans := make([]chan bool, 8)
    orderGetChans := make([]chan bool, 8)
    doneChans := make([]chan bool, 8)

    for i := 0; i < 8; i++ {
        orderPutChans[i] = make(chan bool, 1)
        orderGetChans[i] = make(chan bool)
        doneChans[i] = make(chan bool)
    }

    srcCh := make(chan int)
    dstCh := make(chan int)

    for i := 0; i < 8; i++ {
        go func(j int) {
            myGetCh := orderGetChans[j]
            nextGetCh := orderGetChans[(j+1) % 8]
            myPutCh := orderPutChans[j]
            nextPutCh := orderPutChans[(j+1) % 8]

            for {
                _ = <- myGetCh

                v, ok := <- srcCh

                if !ok {
                    k := (j + 1) % 8
                    if orderGetChans[k] != nil {
                            orderGetChans[k] <- true
                    }
                    orderGetChans[j] = nil

                    break
                }

                nextGetCh <- true

                time.Sleep(1000)

                v *= v

                _ = <- myPutCh

                dstCh <- v

                nextPutCh <- true
            }

            doneChans[j] <- true
        }(i)
    }

    go func() {
        for i := 0; i < 8; i++ {
            _ = <- doneChans[i]
        }
        close(dstCh)
    }()

    orderGetChans[0] <- true
    orderPutChans[0] <- true

    go func() {
        for i := 0; i < 100; i++ {
            srcCh <- i
        }
        close(srcCh)
    }()

    for vv := range dstCh {
        fmt.Println(vv)
    }
}

一个人可以使用通道来传递通道的读/写权限。代码很杂乱,看起来也不是很整洁。 Go中有没有更清洁的方法来实现这一目标?

修改: 我不是在要求“简单”的替换,例如使用chan struct{}上的close或在doneChans上使用doneChans[i] <- true来代替results

Edit2

一种更简单的方法(至少就代码而言)是拥有一个results[j]数组,并且消费者将数据与索引一起发送(这将是工人的mod数),并且goroutine将结果写入 var mins; var hours; var days; var months; var years; var diff = new Date() - new Date(yourOldDate); // yourOldDate may be is coming from DB, for example, but it should be in the correct format ("MM/dd/yyyy hh:mm:ss:fff tt") years = Math.floor((diff) / (1000 * 60 * 60 * 24 * 365)); diff = Math.floor((diff) % (1000 * 60 * 60 * 24 * 365)); months = Math.floor((diff) / (1000 * 60 * 60 * 24 * 30)); diff = Math.floor((diff) % (1000 * 60 * 60 * 24 * 30)); days = Math.floor((diff) / (1000 * 60 * 60 * 24)); diff = Math.floor((diff) % (1000 * 60 * 60 * 24)); hours = Math.floor((diff) / (1000 * 60 * 60)); diff = Math.floor((diff) % (1000 * 60 * 60)); mins = Math.floor((diff) / (1000 * 60)); ,然后让WaitGroup等待所有操作完成(一批一批),然后遍历结果并将其发送到目标通道。 (由于不正确的分享而可能不太好吗?)

1 个答案:

答案 0 :(得分:1)

如果我正确理解,这就是您使用“管道”样式的代码版本。管道中有许多步骤:

  1. 发送src值
  2. 在接收到的src值中工作的工作人员,将其发送到自己的结果通道
  3. 将工作人员的结果渠道的一部分合并为一个无序的渠道
  4. 从无序合并通道中订购无序值

这是代码,它使用您在原始问题的编辑中提到的索引对样式。

type idxPair struct {
    idx, val int
}

func main() {
    // add a done channel, an ability to stop the world by closing this.
    done := make(chan struct{})
    defer close(done)

    // create srcChan, this will be where the values go into the pipeline
    srcCh := make(chan idxPair)

    // create a slice of result channels, one for each of the go workers
    const numWorkers = 8
    resChans := make([]<-chan idxPair, numWorkers)

    // waitgroup to wait for all the workers to stop
    var wg sync.WaitGroup
    wg.Add(numWorkers)

    // start the workers, passing them each the src channel,
    // collecting the result channels they return
    for i := 0; i < numWorkers; i++ {
        resChans[i]  = worker(done, &wg, srcCh)
    }

    // start a single goroutine to send values into the pipeline
    // all values are sent with an index, to be pieces back into order at the end.
    go func() {
        defer close(srcCh)
        for i := 1; i < 100; i++ {
            srcCh <- idxPair{idx: i, val: i}
        }
    }()

    // merge all the results channels into a single results channel
    // this channel is unordered.
    mergedCh := merge(done, resChans...)

    // order the values coming from the mergedCh according the the idxPair.idx field.
    orderedResults := order(100, mergedCh)

    // iterate over each of the ordered results
    for _, v := range orderedResults {
        fmt.Println(v)
    }
}

func order(len int, res <-chan idxPair) []int {
    results := make([]int, len)

    // collect all the values to order them
    for r := range res {
        results[r.idx] = r.val
    }

    return results
}

func worker(done <- chan struct{}, wg *sync.WaitGroup, src <-chan idxPair) <-chan idxPair {
    res := make(chan idxPair)

    go func() {
        defer wg.Done()
        defer close(res)
        sendValue := func(pair idxPair) {
            v := pair.val
            v *= v
            ip := idxPair{idx: pair.idx, val: v}
            select {
            case res <- ip:
            case <-done:
            }
        }

        for v := range src{
             sendValue(v)
        }
    }()

    return res
}


// example and explanation here: https://blog.golang.org/pipelines
func merge(done <-chan struct{}, cs ...<-chan idxPair) <-chan idxPair {
    var wg sync.WaitGroup
    out := make(chan idxPair)

    output := func(c <-chan idxPair) {
        defer wg.Done()
        for n := range c {
            select {
            case out <- n:
            case <-done:
                return
            }
        }
    }
    wg.Add(len(cs))
    for _, c := range cs {
        go output(c)
    }

    go func() {
        wg.Wait()
        close(out)
    }()
    return out
}

我认为这稍微更清洁,而不仅仅是“为此而有所不同”的原因是:

  1. 您可以独立建模和实现每个阶段。 order阶段可以很容易地进行优化,以便在接收到值等时通过通道发送值。
  2. 它更容易组合;您可以在元素上进行异步工作,而将顺序保留为其他可重用性,而不是一种对数组中存储的多个通道进行操作的大型方法。这样可以促进重用。