这是一个特定的问题。我最近测试了gpu.js。该库应该通过使用webgl来并行化计算来加速计算。我做了一个快速测试:
var gpu = new GPU();
function product(v, u) {
return gpu.createKernel(function(X, Y) {
return X[this.thread.x] * Y[this.thread.x];
}).dimensions([v.length])(v, u);
}
var before = new Date().getTime();
console.log(product(numeric.random([100000]), numeric.random([100000])).length);
console.log('Parallel Time: ', (new Date().getTime()) - before);
before = new Date().getTime();
v = numeric.random([100000])
u = numeric.random([100000])
for(var i = 0; i < v.length; i++){
v[i] = v[i] * u[i];
}
console.log(v.length);
console.log('Procedural Time: ', (new Date().getTime()) - before);
得到以下输出:
script.js:11 100000
script.js:12 Parallel Time: 340
script.js:20 100000
script.js:21 Procedural Time: 15
并行时间慢了一个数量级。有什么理由可以这样吗?我在几台具有不同GPU的机器上试过这个。我也尝试了一些类似的操作。我做错了什么或是图书馆的问题?有什么方法可以改善这个吗?
答案 0 :(得分:2)
在处理GPU时,您必须了解开销。
调用gpu.createKernel
可能非常昂贵,因为它必须解析您的JavaScript代码,创建适当的GLSL代码,并将其发送到WebGL进行编译和链接。
至少你会想要调用该命令一次并将结果存储在一个全局变量中,以便在每次调用product
时重复使用。
同样值得注意的是,将数据移入和移出GPU所需的工作量非为零,因此您可以通过更复杂的计算看到更多的收益。
答案 1 :(得分:0)
使用:
t0 = performance.now();
yourFunctionCall();
t1 = performance.now();
console.log("Function yourFunctionCall took " + (t1 - t0) + " ms.");
不确定这是否是问题的核心,但我也一直遇到日期问题。
答案 2 :(得分:0)
我梳理了他们基准测试的源代码,我发现当你连续运行大量操作时,你只能获得加速。我认为这是一个开销问题。我创建了以下超级简单的基准,将gpu.js与numeric.js进行比较。如果有人对此感兴趣:
var gpu = new GPU();
var size = 512;
var scale = 10;
var iterations = 100;
// Scaling up the matricies decreases the effect of precision errors
A = numeric.mul(numeric.random([size, size]), scale)
B = numeric.mul(numeric.random([size, size]), scale)
// I know eval is dangerous but I couldn't get the size in any other way
function multGen(size) {
return eval("(function(A, B) { var sum = 0; for (var i=0; i<"+ size +"; i++) {sum += A[this.thread.y][i] * B[i][this.thread.x];} return sum;})")
}
var mat_mult = gpu.createKernel(multGen(size)).dimensions([size, size]);
var before = new Date().getTime();
var parallel = mat_mult(A, B);
// Need to do many computations to get the advantages of the GPU
for(var i = 0; i < iterations; i++) {
parallel = mat_mult(A, B);
}
var parTime = (new Date().getTime()) - before;
console.log('Parallel Time: ', parTime);
before = new Date().getTime();
var procedural = numeric.dot(A, B);
// Need to do many computations to get the advantages of the GPU
for(var i = 0; i < iterations; i++) {
procedural = numeric.dot(A, B);
}
var procTime = (new Date().getTime()) - before;
console.log('Procedural Time: ', procTime);
console.log((procTime / parTime) + ' times faster');
// This is for RMSD nornalization, flattening and doing min and max that way exceeded the call stack
var max = Math.max(Math.max(...A.map((function(row) {return Math.max(...row);}))), Math.max(...B.map((function(row) {return Math.max(...row);}))))
var min = Math.min(Math.min(...A.map((function(row) {return Math.min(...row);}))), Math.min(...B.map((function(row) {return Math.min(...row);}))))
// The matricies will be different due to precision issues so the Normalized RMDS can give you an idea of the difference
var nrmsd = Math.sqrt(numeric.sum(numeric.pow(numeric.sub(parallel, procedural), 2)) / size) / (max - min);
console.log('Normalized RMSD: ', nrmsd);
这给了我以下输出:
scriptfour.js:26 Parallel Time: 20490
scriptfour.js:36 Procedural Time: 28736
scriptfour.js:38 1.402440214738897 times faster
scriptfour.js:48 Normalized RMSD: 0.009671934749138042
这些结果非常好。 eval不公平地放慢了并行速度,但仍然总是更快。我认为这样的设置不适合生产,但它仍然适用于此。