我在Groovy中写了一个小线程管理器。管理器接受一个数组,一个在数组上执行的任务,以及一个块大小。然后在单独的线程中执行该任务,并将结果添加到结果数组中。
这是类代码:
class ParallelManager {
static def _threads = []
static def _threadsFinishedCorrectly = []
static def _results = []
static def runParallelTask( def dataArray, def taskFunc, int chunkSize ){
assert chunkSize > 0
assert taskFunc
if (dataArray.size()==0) return
assert dataArray.size() >= 0
def subArray = partitionArray(dataArray, chunkSize)
assert subArray.size() > 0
subArray.each{ arrChunk->
_threads.add( Thread.start{
def chunkResults = taskFunc(arrChunk)
assert chunkResults != null
_results.add(chunkResults) // EXCEPTION HERE
_threadsFinishedCorrectly.add(true)
})
}
// wait for all threads to finish
_threads.each{ it.join() }
log.info("Waiting for all threads to finish...")
assert _threadsFinishedCorrectly.size() == _threads.size(),'some threads failed.'
assert _results.size() == _threads.size()
log.info("${_threads.size()} finished.")
return _results
}
/**
* Util function
* @param array
* @param size
* @return
*/
static def partitionArray(array, size) {
def partitions = []
int partitionCount = array.size() / size
partitionCount.times { partitionNumber ->
def start = partitionNumber * size
def end = start + size - 1
partitions << array[start..end]
}
if (array.size() % size) partitions << array[partitionCount * size..-1]
return partitions
}
可以像这样调用经理:
def parallFunc = { array->
log.info "I'm multiplying $array by 2"
return array.collect{it*2}
}
def results = ParallelManager.runParallelTask( [1,2,3,4,5,6,7,8], parallFunc, 3)
此代码偶尔会在上面标记的行中抛出此异常:
Exception in thread "Thread-3" java.lang.ArrayIndexOutOfBoundsException: 1
[java] at java.util.ArrayList.add(ArrayList.java:352)
[java] at java_util_List$add.call(Unknown Source)
你有解决这个问题的方法吗?我认为像这样的小线程管理器对许多人来说对于加速代码中的常见任务会很有用。
干杯, Mulone
答案 0 :(得分:4)
这就是你用GPars做的事情:
@Grab( 'org.codehaus.gpars:gpars:0.12' )
import groovyx.gpars.*
def arr = [ 1, 2, 3, 4, 5, 6, 7, 8 ]
arr = GParsPool.withPool {
arr.collectParallel { it * 2 }
}
答案 1 :(得分:1)
您是否听说过gpars项目?它是一个经过验证的库,旨在使多核硬件的并发编程变得直观。它在收集处理方面非常强大。
我建议你依赖这个库而不是实现你自己的有限版本的简单线程管理器。
答案 2 :(得分:1)
我使用Vectors而不是arraylists解决了这个问题。 下面的工作代码:
class ParallelManager {
static def log = Logger.getLogger(ParallelManager)
Vector _threads = []
Vector _threadsFinishedCorrectly = []
Vector _results = []
/**
*
* @param dataArray
* @param chunkSize
* @param taskFunc
* @return
*/
def runParallelTasks( def dataArray, int chunkSize, def taskFunc ){
reset()
assert chunkSize > 0
assert taskFunc
if (dataArray.size()==0) return
assert dataArray.size() >= 0
def subArray = partitionArray(dataArray, chunkSize)
assert subArray.size() > 0
subArray.each{ arrChunk->
_threads.add( Thread.start{
def chunkResults = taskFunc(arrChunk)
assert chunkResults != null
_results.add(chunkResults)
_threadsFinishedCorrectly.add(true)
})
}
// wait for all threads to finish
_threads.each{ it.join() }
log.debug("Waiting for all threads to finish...")
assert _threadsFinishedCorrectly.size() == _threads.size(),'some threads failed.'
assert _results.size() == _threads.size()
log.debug("${_threads.size()} finished.")
def res = _results.flatten()
//reset()
assert dataArray.size() == res.size(),"Something went wrong. Some threads did not return their results. results=$res"
return res
}
void reset(){
_threads = []
_results = []
_threadsFinishedCorrectly = []
}
/**
*
* @param array
* @param size
* @return
*/
def partitionArray(array, size) {
def partitions = []
int partitionCount = array.size() / size
partitionCount.times { partitionNumber ->
def start = partitionNumber * size
def end = start + size - 1
partitions << array[start..end]
}
if (array.size() % size) partitions << array[partitionCount * size..-1]
return partitions
}
}
可以像这样调用经理:
someClosure = {
def resArray = doSomethingOn(it)
return(resArray)
}
def resultArray = new ParallelManager().runParallelTasks( inputArray, 4, someClosure )