Question

为了量化C类数组和C ++中的Vectors的性能差异，我写了这个小程序。 https://github.com/rajatkhanduja/Benchmarks/blob/master/C%2B%2B/vectorVsArray.cpp

为了比较它们的共同点，我决定对随机和顺序访问进行测试。我添加了迭代器，只是为了比较它们（但这不是问题所关注的）。

对于具有7.7 GB RAM且阵列/向量大小为100万的64位Linux计算机的结果如下： -

写入阵列所需的时间。：12.0378 ms
按顺序从数组中读取的时间。：2.48413 ms
随机读取数组所需的时间。：37.3931 ms
写入动态数组所花费的时间。：11.7458 ms
按顺序从动态数组中读取的时间。：2.85107 ms
随机读取动态数组所需的时间。：36.0579 ms
使用索引写入向量所花费的时间。：11.3909 ms
依次使用索引从矢量读取所花费的时间。：4.09106 ms
随机使用索引从向量读取的时间。：39毫秒
使用迭代器写入向量所花费的时间。：24.9949 ms
使用迭代器从向量读取的时间。：18.8049 ms

向量的大小在初始化时设置而不是更改，因此不会调整向量的大小（程序中的断言有助于验证）。时间不包括任何静态分配的数组，动态分配的数组或向量的初始化时间。

根据统计数据，写入Vector的时间小于数组的时间，但从向量读取的时间是数组的两倍。

差异很小，但有没有解释为什么会有性能差异？测试有问题吗？我希望两者都以相同的速度运行。该测试的重复显示了相同的趋势。

代码：

#include <vector>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <sys/time.h>
#include <cassert>

#define ARR_SIZE 1000000

using std::string;

void printtime (struct timeval& start, struct timeval& end, string str);   

int main (void)
{
  int arr[ARR_SIZE];
  int tmp;
  struct timeval start, stop;

  srand (time (NULL));

  /* Writing data to array */
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    arr[i] = rand();
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to write to array."));

  /* Reading data from array */
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    tmp = arr[i];
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from array sequentially."));

  /* Reading data from array randomly*/
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    tmp = arr[rand() % ARR_SIZE];
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from array randomly."));


  int *darr = (int *) calloc (sizeof (int), ARR_SIZE);  

  /* Writing data to array */
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    darr[i] = rand();
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to write to dynamic array."));

  /* Reading data from array */
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    tmp = darr[i];
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from dynamic array sequentially."));

  /* Reading data from dynamic array randomly*/
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    tmp = darr[rand() % ARR_SIZE];
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from dynamic array randomly."));

  std::vector<int> v(ARR_SIZE);
  assert (v.capacity() == ARR_SIZE);

  /* Writing to vector using indices*/
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    v[i] = rand();
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to write to vector using indices."));
  assert (v.capacity() == ARR_SIZE);

  /* Reading from vector using indices*/
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    tmp = v[i];
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from vector using indices, sequentially."));

  /* Reading data from dynamic array randomly*/
  gettimeofday (&start, NULL);
  for (int i = 0; i < ARR_SIZE; i++)
  {
    tmp = v[rand() % ARR_SIZE];
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from vector using indices, randomly."));

  std::vector<int> v2(ARR_SIZE);

  /* Writing to vector using iterators*/
  gettimeofday (&start, NULL);
  std::vector<int>::iterator itr, itr_end;
  for (itr = v2.begin(), itr_end = v2.end(); itr != itr_end; itr++)
  {
    *itr = rand();
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to write to vector using iterators."));


  /* Reading from vector using iterators*/
  gettimeofday (&start, NULL);
  for (itr = v2.begin(), itr_end = v2.end(); itr != itr_end; itr++)
  {
    tmp = *itr;
  }
  gettimeofday (&stop, NULL);
  printtime (start, stop, string ("Time taken to read from vector using iterators."));

  return 0;
}

void printtime (struct timeval& start, struct timeval& end, string str)
{
  double start_time, end_time, diff;

  start_time = ((start.tv_sec) * 1000 + start.tv_usec/1000.0);
  end_time   = ((end.tv_sec) * 1000 + end.tv_usec/1000.0);
  diff = end_time - start_time;

  std::cout << str << " : " << diff << " ms" << std::endl;
}

编辑

正如评论中所建议的，这里有更多信息： -

编译器： - g ++ - 4.5.2
标志： - 无（即默认值）
优化： - 无（我想在通常的设置中测试行为。优化可能会改变程序的行为，例如，因为从不使用变量tmp，所以可以完全跳过读取矢量/数组的步骤或者简化为最后的任务。至少这是我所理解的。）

Answer 1

当然不是一个明确的答案，但是你正在循环写一个变量，这意味着编译器可以很容易地猜出顺序读取的最终结果，从而优化循环。因为它显然没有这样做，我认为没有优化，这肯定不利于迭代器方法。其他数字太接近无法得出结论。

C ++ Array vs Vector性能测试说明

1 个答案: