Question

我无法解释为什么我的性能测试会在两种不同类型的运行中返回显着不同的结果。

重现问题的步骤：

从gist获取代码： https://gist.github.com/AVAVT/83685bfe5280efc7278465f90657b9ea
运行node practice1.generator
运行node practice1.performance-test

practice1.generator应生成test-data.json文件，并将一些搜索算法执行时间记录到控制台中。之后，practice1.performance-test从test-data.json读取，并对相同的数据执行完全相同的评估功能。

我机器上的输出与此类似：

> node practice1.generator
Generate time: 9,307,061,368 nanoseconds
Total time using indexOf             : 7,005,750 nanoseconds
Total time using for loop            : 7,463,967 nanoseconds
Total time using binary search       : 1,741,822 nanoseconds
Total time using interpolation search: 915,532 nanoseconds

> node practice1.performance-test
Total time using indexOf             : 11,574,993 nanoseconds
Total time using for loop            : 8,765,902 nanoseconds
Total time using binary search       : 2,365,598 nanoseconds
Total time using interpolation search: 771,005 nanoseconds

请注意indexOf和binary search与其他算法相比，执行时间的差异。

如果我反复运行node practice1.generator 或 node practice1.performance-test，结果会非常一致。

现在这太麻烦了，我找不到办法弄清楚哪些结果是可信的，以及为什么会出现这种差异。是由生成的测试数组与JSON.parse-d测试数组之间的差异引起的;或者是由process.hrtime()引起的;或者是我甚至无法理解的一些未知原因？

更新：由于indexOf，我跟踪JSON.parse案例的原因。在practice1.generator内，tests数组是原始生成的数组;而在practice1.performance-test中，数组是从json文件中读取的，可能与原始数组有所不同。

如果在practice1.generator范围内我改为JSON.parse()字符串中的新数组：

var tests2 = JSON.parse(JSON.stringify(tests));

performanceUtil.performanceTest(tests2);

现在indexOf的执行时间在两个文件上都是一致的。

> node practice1.generator
Generate time: 9,026,080,466 nanoseconds
Total time using indexOf             : 11,016,420 nanoseconds
Total time using for loop            : 8,534,540 nanoseconds
Total time using binary search       : 1,586,780 nanoseconds
Total time using interpolation search: 742,460 nanoseconds

> node practice1.performance-test
Total time using indexOf             : 11,423,556 nanoseconds
Total time using for loop            : 8,509,602 nanoseconds
Total time using binary search       : 2,303,099 nanoseconds
Total time using interpolation search: 718,723 nanoseconds

所以至少我知道indexOf在原始阵列上运行得更好，在JSON.parse - d阵列上运行得更糟。 我仍然只知道原因，不知道原因。

二进制搜索执行时间在2个文件上保持不同，在practice1.generator中始终需要~1.7ms（即使使用JSON.parse - d对象时）和~2.3ms在practice1.performance-test。

以下是与要点相同的代码，供将来参考之用。

/*
 * performance-utils.js
 */
'use strict';

const performanceTest = function(tests){
  var tindexOf = process.hrtime();
  tests.forEach(testcase => {
    var result = testcase.input.indexOf(testcase.target);

    if(result !== testcase.output) console.log("Errr", result, testcase.output);
  });
  tindexOf = process.hrtime(tindexOf);

  var tmanual = process.hrtime();
  tests.forEach(testcase => {
    const arrLen = testcase.input.length;
    var result = -1;
    for(var i=0;i<arrLen;i++){
      if(testcase.input[i] === testcase.target){
        result = i;
        break;
      }
    }

    if(result !== testcase.output) console.log("Errr", result, testcase.output);
  });
  tmanual = process.hrtime(tmanual);

  var tbinary = process.hrtime();
  tests.forEach(testcase => {
    var max = testcase.input.length-1;
    var min = 0;
    var check, num;
    var result = -1;

    while(max => min){
      check = Math.floor((max+min)/2);
      num = testcase.input[check];

      if(num === testcase.target){
        result = check;
        break;
      }
      else if(num > testcase.target) max = check-1;
      else min = check+1;
    }

    if(result !== testcase.output) console.log("Errr", result, testcase.output);
  });
  tbinary = process.hrtime(tbinary);


  var tinterpolation = process.hrtime();
  tests.forEach(testcase => {
    var max = testcase.input.length-1;
    var min = 0;
    var result = -1;
    var check, num;

    while(max > min && testcase.target >= testcase.input[min] && testcase.target <= testcase.input[max]){
      check = min +  Math.round((max-min) * (testcase.target - testcase.input[min]) / (testcase.input[max]-testcase.input[min]));
      num = testcase.input[check];

      if(num === testcase.target){
        result = check;
        break;
      }
      else if(testcase.target > num) min = check + 1;
      else max = check - 1;
    }

    if(result === -1 && testcase.input[max] == testcase.target) result = max;

    if(result !== testcase.output) console.log("Errr", result, testcase.output);
  });
  tinterpolation = process.hrtime(tinterpolation);

  console.log(`Total time using indexOf             : ${(tindexOf[0] * 1e9 + tindexOf[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
  console.log(`Total time using for loop            : ${(tmanual[0] * 1e9 + tmanual[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
  console.log(`Total time using binary search       : ${(tbinary[0] * 1e9 + tbinary[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
  console.log(`Total time using interpolation search: ${(tinterpolation[0] * 1e9 + tinterpolation[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);
}

module.exports = { performanceTest }




/*
 * practice1.generator.js
 */
'use strict';

require('util');
const performanceUtil = require('./performance-utils');
const fs = require('fs');
const path = require('path');
const outputFilePath = path.join(__dirname, process.argv[3] || 'test-data.json');

const AMOUNT_TO_GENERATE = parseInt(process.argv[2] || 1000);

// Make sure ARRAY_LENGTH_MAX < (MAX_NUMBER - MIN_NUMBER)
const ARRAY_LENGTH_MIN = 10000;
const ARRAY_LENGTH_MAX = 18000;
const MIN_NUMBER = -10000;
const MAX_NUMBER = 10000;

const candidates = Array.from(Array(MAX_NUMBER - MIN_NUMBER + 1), (item, index) => MIN_NUMBER + index);

function createNewTestcase(){
  var input = candidates.slice();
  const lengthToGenerate = Math.floor(Math.random()*(ARRAY_LENGTH_MAX - ARRAY_LENGTH_MIN + 1)) + ARRAY_LENGTH_MIN;

  while(input.length > lengthToGenerate){
    input.splice(Math.floor(Math.random()*input.length), 1);
  }

  const notfound = input.length === lengthToGenerate ?
    input.splice(Math.floor(Math.random()*input.length), 1)[0] : MIN_NUMBER-1;

  const output = Math.floor(Math.random()*(input.length+1)) - 1;
  const target = output === -1 ? notfound : input[output];

  return {
    input,
    target,
    output
  };
}

var tgen = process.hrtime();

var tests = [];
while(tests.length < AMOUNT_TO_GENERATE){
  tests.push(createNewTestcase());
}

fs.writeFileSync(outputFilePath, JSON.stringify(tests));
var tgen = process.hrtime(tgen);
console.log(`Generate time: ${(tgen[0] * 1e9 + tgen[1]).toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",")} nanoseconds`);

performanceUtil.performanceTest(tests);



/*
 * practice1.performance-test.js
 */
'use strict';

require('util');
const performanceUtil = require('./performance-utils');
const fs = require('fs');
const path = require('path');
const outputFilePath = path.join(__dirname, process.argv[2] || 'test-data.json');

var tests = JSON.parse(fs.readFileSync(outputFilePath));
performanceUtil.performanceTest(tests);

Answer 1

正如您已经注意到的那样，性能差异会导致比较：generated array vs JSON.parse d。我们在两种情况下都有：相同数字的相同数组？因此，查找性能必须相同？没有。

每个Javascript引擎都有各种数据类型结构，用于表示相同的值（数字，对象，数组等）。在大多数情况下，优化器会尝试找出要使用的最佳数据类型。并且还经常为数组生成一些额外的元信息，例如hidden clases或tags。

有几篇关于数据类型的非常好的文章：

那么为什么JSON.parse创建的数组很慢？解析器在创建值时，不能正确优化数据结构，因此我们得到untagged数组boxed加倍。但我们可以使用Array.from对数组进行优化，在您的情况下，与生成的数组相同，您会得到smi个数字为smi的数组。以下是基于您的示例的示例。

const fs = require('fs');
const path = require('path');
const outputFilePath = path.join(__dirname, process.argv[2] || 'test-data.json');

let tests = JSON.parse(fs.readFileSync(outputFilePath));

// for this demo we take only the first items array
var arrSlow = tests[0].input;
// `slice` copies array as-is
var arrSlow2 = tests[0].input.slice();
// array is copied and optimized
var arrFast = Array.from(tests[0].input);

console.log(%HasFastSmiElements(arrFast), %HasFastSmiElements(arrSlow), %HasFastSmiElements(arrSlow2));
//> true, false, false
console.log(%HasFastObjectElements(arrFast), %HasFastObjectElements(arrSlow), %HasFastObjectElements(arrSlow2));
//> false, true, true
console.log(%HasFastDoubleElements(arrFast), %HasFastDoubleElements(arrSlow), %HasFastDoubleElements(arrSlow2));
//> false, false, false

// small numbers and unboxed doubles in action
console.log(%HasFastDoubleElements([Math.pow(2, 31)]));
console.log(%HasFastSmiElements([Math.pow(2, 30)]));

使用node --allow-natives-syntax test.js

运行它

Answer 2

好的......首先让我们谈谈测试策略......

多次运行此测试会给每个点带来令人难以置信的不同结果波动...请参阅此处的结果

https://docs.google.com/spreadsheets/d/1Z95GtT85BljpNda4l-usPjNTA5lJtUmmcY7BVB8fFGQ/edit?usp=sharing

测试更新后（连续运行100次测试并计算平均值）我得出执行时间的主要差异是：

indexOf和for循环在GENERATOR场景中工作得更好
二进制搜索和插值搜索在JSON解析场景中运行得更好

请先查看google doc ...

好的..很好......这个事情要容易解释......基本上我们陷入了 RANDOM内存访问（二进制，插值搜索）和 CONSECUTIVE内存访问的情况（indexOf，for）给出不同的结果

嗯。让我们深入了解NodeJS的内存管理模型

首先，NodeJS有几个数组表示，实际上我只知道两个 - numberArray，objectArray（表示可以包含任何类型值的数组）

让我们看看GENERATOR情景：

在初始阵列创建期间，NodeJS ABLE 以检测您的数组仅包含数字，因为数组仅从数字开始，并且不会添加任何其他类型的数字。这导致使用简单的内存分配策略，只是在内存中逐个整数的原始行...

数组在内存中表示为array of raw numbers，很可能只有memory paging table在此处生效

这一事实清楚地解释了为什么 CONSECUTIVE内存访问在这种情况下效果更好。

让我们看一下JSON解析场景：

在JSON解析结构期间，JSON是不可预测的（NodeJS使用流JSON解析器（99.99％置信度）），每个值都被追踪为最适合JSON解析的，所以......

数组在内存中表示为array of references to the numbers，只是因为在解析JSON时这个解决方案在大多数情况下更有效率（并且没有人关心（恶魔））

只要我们通过小块分配堆中的内存，内存就会以更流畅的方式填充

同样在此模型中 RANDOM内存访问可以提供更好的结果，因为NodeJS引擎没有选项 - 为了优化访问时间，它可以创建prefix tree hash map或 RANDOM内存访问方案中的访问时间

这是很好的解释为什么JSON解析场景在二进制插值搜索

期间获胜

使用process.hrtime（）的执行时间会返回截然不同的结果

2 个答案: