我正在尝试使用gpu.js运行一维元胞自动机,但是没有gpu.js的代码看起来要快20倍。我不明白问题出在哪里... 下面的代码从一个ON单元开始运行规则30的单元自动机。自动机的长度为2001个细胞。该代码使用CPU和GPU来计算前10000代,对于CPU来说需要0.16秒,而对于GPU则需要3.2秒。
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Title Goes Here</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/gpu.js/1.10.4/gpu.min.js"></script>
</head>
<body>
<script>
gen = [];
for (var i = 0; i<1000; i++) gen.push(0);
gen.push(1);
for (var i = 0; i<1000; i++) gen.push(0);
const gpu = new GPU();
const kernel = gpu.createKernel(function(x) {
const x1 = x[this.thread.x-1];
const x2 = x[this.thread.x];
const x3 = x[this.thread.x+1];
return x1+x2+x3-2*x1*x2-x2*x3-2*x1*x3+2*x1*x2*x3;
}).setOutput([gen.length]);
kernelCPU = function(x){
var res = [];
x.unshift(0);
x.push(0);
for (var i = 1; i < x.length-1; i++) {
x1 = x[i-1];
x2 = x[i];
x3 = x[i+1];
res.push(x1+x2+x3-2*x1*x2-x2*x3-2*x1*x3+2*x1*x2*x3);
}
return res;
}
var t0 = performance.now();
for (var i = 0; i< 10000; i++) {
gen = kernelCPU(gen);
}
var t1 = performance.now();
console.log("Took " + (t1 - t0)/1000 + " seconds for CPU")
var t0 = performance.now();
for (var i = 0; i< 10000; i++) {
gen = kernel(gen);
}
var t1 = performance.now();
console.log("Took " + (t1 - t0)/1000 + " seconds for GPU.")
</script>
</body>
</html>
答案 0 :(得分:0)
您的问题出在循环中
for (var i = 0; i< 10000; i++) {
gen = kernel(gen);
}
调用GPU内核函数会产生一些非零的开销。您只有2001个可以并行计算的单元-在调用GPU内核函数的初始开销之后,这些单元的计算速度要比CPU上的计算速度快得多。但是,因为遇到GPU调用开销 10,000 次,您所付出的不仅仅是获得的任何性能提升。