当前代码结构:
func doStuff(serializeds []string) ([]*MyStruct, error) {
objs := []*MyStruct{}
for _, s := range serializeds {
deserializedObject, ok, err := doDeserialization(s)
if err != nil {
// if any err, abandon the whole thing
return nil, err
}
if !ok {
continue
}
objs = append(objs, deserializedObject)
}
return objs, nil
}
serializeds
通常一次包含200-1000个序列化字符串。它们每个都需要大约0.5-1ms doDeserialization
。
目标:
附带问题:是否可以为每个序列化字符串旋转一个goroutine,或者仅使用有限数量的goroutine(例如50个goroutines)更有效
答案 0 :(得分:1)
您可以预先使用所需大小创建输出切片(事先知道serializeds
的长度),然后使用Go例程中的映射值和原始切片中的索引填充此切片:< / p>
wait := new(sync.WaitGroup)
objs := make([]YourStructType, len(serializeds))
for i, s := range serializeds {
wait.Add(1)
go func(j int) {
defer wait.Done()
deserializedObject, err := doDeserialization(s)
if err != nil {
// add error handling here
}
objs[j] = deserializedObject
}(j)
}
wait.Wait()
关于您的附带问题:这保证了对两种实现方式的应用程序进行全面分析。直观地说,我猜想Go的goroutine调度程序应该足够有效地处理这个问题而不需要太多开销,并且您可能不应该为goroutine工作池的额外复杂性而烦恼。但是,如果没有剖析,那就是最好的猜测。
答案 1 :(得分:0)
您的问题需要并发处理,然后是顺序读取。 @helmbert的解决方案简单而优雅,足以通过更多调整来解决您的问题。
但是,您也可以使用以下过程执行受控并发执行,然后执行顺序读取。它不是很干净,但我之前使用过这样的东西,它工作正常。 (请注意,下面的代码几乎肯定有错误,所以要小心)。
objs := []*MyStruct{}
// concurrency factor. This controlls the number of
// goroutines you'll be running. (note that number of goroutines
// can be cf+1 also if len(serializeds) % cf != 0.
cf := 3
// length of sub slice each goroutine will be procesing
subSliceLen := len(serializeds) / cf // cf must be > len
// make a channel to get error from goroutines
errorCh := make(chan error)
// chans will store the created channels in proper order
chans := make([]chan *MyStruct)
// quit channel to signal currently executing goroutines to stop
quit := make([]chan struct{})
// loop to only read a part of the original input slice
for i := 0; i <= len(serializeds); i += subSliceLen {
// setup slice sub section to be processed. may be bugged!!
hi := subSliceLen
if i + hi >= len(serializeds) {
hi = len(serializeds)
}
// create a channel for the goroutine that will handle
// input slice values from i to hi. It is important to make
// these channels buffered otherwise there will be no possibility
// of parallel execution.
ch := make(chan *MyStruct{}, subSliceLen)
chans = append(chans, ch)
go func(ch chan *MyStruct, i, hi int) {
defer close(ch)
for _, s := range serialzed[i:hi] {
deserializedObject, ok, err := doDeserialization(s)
if err != nil {
// if any err, abandon the whole thing
errorCh <- err
// signal other goroutines that they should
// stop the work and return
close(quit)
return
}
if !ok {
continue
}
select {
// this is required in order to receive the signal that
// some other goroutine has encountered an error and that
// this goroutine should also cleanly return. without this
// there will be a goroutine leak.
case <- quit:
return
case ch <- deserializedObject:
// do nothing
}
}
}(ch, i, hi)
}
现在chans
拥有现在正在接收已处理数据的所有频道。我们现在可以从他们开始顺序阅读。
i := 0
for {
select {
case v, ok := <-chans[i]:
if !ok {
// chans[i] is closed and has been read completely
if i >= len(chans) {
break
}
// proceed to the next channel
i++
}
objs = append(objs, v)
case <- err:
// handle error, close all
}
}