为了熟悉Web工作者,我编写了一个小测试,其中2个数组是按元素添加的。目标数组中的位置分配给4个工作人员。我想测量表现并经历一次粗鲁的觉醒。
///init///
const workers = new Array(4), global_elements = 250000;
function createArray(value, elements) {
return new Int8Array(elements).fill(value);
}
let a = createArray(1, global_elements), b = createArray(2,global_elements), c = createArray(0,global_elements), data_recived = 0;
window.URL = window.URL || window.webkitURL;
for(let i=0; i<4; ++i) {
let response = `self.onmessage=function(e){
for(let i=${i*global_elements}; i<${i*global_elements+global_elements}; ++i) {
e.data[2][i] = e.data[0][i] + e.data[1][i];
}
postMessage(0);
}`;
workers[i] = new Worker(URL.createObjectURL(new Blob([response], {type: 'application/javascript'})));
workers[i].onmessage = function(e) {
if(++data_recived === 4) {
t1 = performance.now();
console.log(t1-t0);
}
};
}
///end-init///
//normal
let t0 = performance.now();
for(let i=0; i<global_elements; ++i) {
c[i] = a[i] + b[i];
}
let t1 = performance.now();
console.log(t1-t0);
//worker
t0 = performance.now();
for(let i=0; i<4; ++i) {
workers[i].postMessage([a,b,c]);
}
可悲的是,这里的工人甚至不能随着global_elements数量的增加而得分。
元素:正常|工
2500:0.1 | 51.4
25000:1.5 | 66.5
250000:4.1 | 182个
(我知道性能测试不是最佳的。)
为什么我的网络工作者表现得出乎意料地糟糕?
答案 0 :(得分:0)
我怀疑性能问题的原因是在向工作人员发送数据时发生的复制。主线程和worker之间不共享内存,因此传递消息通常涉及克隆要发送的数据。在此处查看更多信息:https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers#Transferring_data_to_and_from_workers_further_details
某些浏览器支持一种名为Transferable Objects的技术,它允许您在不克隆对象的情况下传递对象。您可以在此处详细了解:https://developers.google.com/web/updates/2011/12/Transferable-Objects-Lightning-Fast
答案 1 :(得分:0)
我发现在函数中访问数组的方式对性能有很大影响。通过在本地保存它们,我能够将计算时间减少一半以上。
任务:2DMatrixMul,元素:250000,worker_count:8
需要正常:1010毫秒
需要的工人:325毫秒
对于那些感兴趣的人来说,这就是我的小测试案例。
const worker_count = 8, workers = new Array(worker_count), global_elements = 250000;
let data_recived = 0, a = new Tensor(1, [Math.sqrt(global_elements), Math.sqrt(global_elements)]), b = new Tensor(1, [Math.sqrt(global_elements), Math.sqrt(global_elements)]), c = new Tensor(0, [Math.sqrt(global_elements), Math.sqrt(global_elements)]);
window.URL = window.URL || window.webkitURL;
for(let i=0; i<worker_count; ++i) {
let response = `
self.onmessage=function(e){
const a = e.data[0], b = e.data[1], tmp = new Float32Array(${global_elements/worker_count});
let c=${i*global_elements/worker_count}, i=0, z=0, m=0, lim = 0;
for(; c<${i*global_elements/worker_count+global_elements/worker_count}; ++c) {
i = Math.floor(c/${c._shape._dim[0]}), z = c%${c._shape._dim[0]};
for(m=i*${a._shape._dim[1]}, lim = m+${a._shape._dim[1]}; m<lim; ++m) {
tmp[c-${i*global_elements/worker_count}] += a[m] * b[(m - i*${a._shape._dim[1]})*${b._shape._dim[1]}+z];
}
}
postMessage(tmp);
}`;
workers[i] = new Worker(URL.createObjectURL(new Blob([response], {type: 'application/javascript'})));
workers[i].onmessage = function(e) {
pass(c._value, e.data, i);
if(++data_recived === worker_count) {
t1 = performance.now();
console.log(t1-t0);
console.log(c);
}
};
}
function pass(arr, data, index) {
for(let i=index*global_elements/worker_count, len=index*global_elements/worker_count+global_elements/worker_count; i<len; ++i) {
arr[i] = data[i-index*global_elements/worker_count];
}
}
///end-init///
//normal
let t0 = performance.now();
calculations.matmul2D(a,b);
let t1 = performance.now();
console.log(t1-t0);
//worker
t0 = performance.now();
for(let i=0; i<worker_count; ++i) {
workers[i].postMessage([a._value,b._value]);
}
/* calculations.matmul2D( ) looks like this:
static matmul2D(a, b) { //revisite
const row = a._shape._dim[0], column = b._shape._dim[1], a1 = a._shape._dim[1];
let c=0, i=0, m=0, lim=0, len=row * column, result = new Tensor(0, [a._shape._dim[0], column]);
for(; c<len; ++c) {
i = Math.floor(c/row);
for(m=i*a1, lim = m+a1; m<lim; ++m) {
result._value[c] += a._value[m] * b._value[(m - i*a1)*column+(c%row)];
}
}
return result;
}
*/