Question

所以我很新！但是我对想要尝试的东西有这个想法。

我希望有一个go例程，该例程从通道接收字符串，但是只有在接收到N个字符串之后，才能在它们上执行。

我环顾四周寻找类似的问题或案例，但我只发现了那些想法是并行执行几个例程并等待汇总结果的问题。

我虽然想到创建数组并将其传递给足够长的例程的想法。但是，我想将关注点保持一定分离，并在接收端进行控制。

我的问题是

这是出于某种原因的不良做法吗？

有没有更好的方法可以做到这一点？

func main() {
    ch := make(chan string)
    go func() {
        tasks := []string{}
        for {
            tasks = append(tasks,<- ch)

            if len(tasks) < 3 {
                fmt.Println("Queue still to small")
            }
            if len(tasks) > 3 {
                for i := 0; i < len(tasks); i++ {
                    fmt.Println(tasks[i])
                }
            }
        }
    }()

    ch <- "Msg 1"
    time.Sleep(time.Second)
    ch <- "Msg 2"
    time.Sleep(time.Second)
    ch <- "Msg 3"
    time.Sleep(time.Second)
    ch <- "Msg 4"
    time.Sleep(time.Second)
}

编辑更简单，更准确的示例。

Answer 1

基于一些评论，您似乎正在寻找某种形式的批处理。

分批处理有几种情况，您需要分批处理并一起发送：

批量大小足够大小
时间已过，应冲洗部分批次

您给出的示例没有考虑第二种情况。如果您因为停止获取负载而从不刷新，这可能会导致一些尴尬的行为。

因此，我建议您浏览一个库（例如cloudfoundry/go-batching）或只使用通道，一个Timer和一个select语句。

package main

import (
    "fmt"
    "time"
)

func main() {
    ch := make(chan string)
    go func() {
        tasks := []string{}
        timer := time.NewTimer(time.Second) // Adjust this based on a reasonable user experience
        for {
            select {
            case <-timer.C:
                fmt.Println("Flush partial batch due to time")
                flush(tasks)
                tasks = nil
                timer.Reset(time.Second)
            case data := <-ch:
                tasks = append(tasks, data)

                // Reset the timer for each data point so that we only flush
                // partial batches when we stop receiving data.
                if !timer.Stop() {
                    <-timer.C
                }
                timer.Reset(time.Second)

                // Guard clause to for batch size
                if len(tasks) < 3 {
                    fmt.Println("Queue still to small")
                    continue
                }

                //
                flush(tasks)
                tasks = nil // reset tasks
            }
        }
    }()

    ch <- "Msg 1"
    time.Sleep(time.Second)
    ch <- "Msg 2"
    time.Sleep(time.Second)
    ch <- "Msg 3"
    time.Sleep(time.Second)
    ch <- "Msg 4"
    time.Sleep(time.Second)
}

func flush(tasks []string) {
    // Guard against emtpy flushes
    if len(tasks) == 0 {
        return
    }

    fmt.Println("Flush")
    for _, t := range tasks {
        fmt.Println(t)
    }
}

Answer 2

我可以看到批处理结果如何有用。但这确实需要自定义解决方案。解决此问题的方法有很多，我尝试使用Sync.WaitGroup，但很麻烦。似乎使用sync.Mutex锁定批处理功能是最好的方法。但是，当互斥锁是imo最好的答案时，它应该触发对设计的重新检查，因为imo还是应作为最后的选择。

package main

import (
    "context"
    "fmt"
    "sync"
    "sync/atomic"
)

func main() {

    ctx, canc := context.WithCancel(context.Background())
    acc := NewAccumulator(4, ctx)
    go func() {
        for i := 0; i < 10; i++ {
            acc.Write("hi")
        }
        canc()
    }()

    read := acc.ReadChan()
    for batch := range read {
        fmt.Println(batch)
    }
    fmt.Println("done")
}

type Accumulator struct {
    count    int64
    size     int
    in       chan string
    out      chan []string
    ctx      context.Context
    doneFlag int64
    mu   sync.Mutex
}

func NewAccumulator(size int, parentCtx context.Context) *Accumulator {
    a := &Accumulator{
        size: size,
        in:   make(chan string, size),
        out:  make(chan []string, 1),
        ctx:  parentCtx,
    }

    go func() {
        <-a.ctx.Done()
        atomic.AddInt64(&a.doneFlag, 1)
        close(a.in)
        a.mu.Lock()
        a.batch()
        a.mu.Unlock()
        close(a.out)
    }()
    return a
}

func (a *Accumulator) Write(s string) {
    if atomic.LoadInt64(&a.doneFlag) > 0 {
        panic("write to closed accumulator")
    }
    a.in <- s
    atomic.AddInt64(&a.count, 1)
    a.mu.Lock()
    if atomic.LoadInt64(&a.count) == int64(a.size) {
        a.batch()
    }
    a.mu.Unlock()
}

func (a *Accumulator) batch() {
    batch := make([]string, 0)
    for i := 0; i < a.size; i++ {
        msg := <-a.in
        if msg != "" {
            batch = append(batch, msg)
        }
    }
    fmt.Println("batching", batch)
    a.out <- batch
    atomic.StoreInt64(&a.count, 0)
}

func (a *Accumulator) ReadChan() <-chan []string {
    return a.out
}

最好是有一个存储字符串的分片，当该分片达到某个大小时，便开始进行一些处理。

等待通道中的N个项目，然后顺序执行

2 个答案: