如何让我的std :: vector实现更快?

时间:2014-09-29 13:17:37

标签: c++ performance c++11 vector

我正在尝试编写std::vector的实现来学习C ++,我的实现比std::vector慢(参见输出)。

我想知道如何从任何C ++专家那里改进它。我看到了这个问题(Why is std::vector so fast ( or is my implementation is too slow )),但由于海报使用了错误的数据结构,他的问题没有帮助。

我问我怎么能比std::vector更快地得到它。

vector.h

template <typename T>
class Vector {
public:
    explicit Vector(const int n);
    explicit Vector(const int n, const T& val);
    T& operator[](const int i);
    inline int const length();
    inline void fill(const T& val);
private:
    T* arr;
    int len;
};

vector.cpp

#include "vector.h"
#include <iostream>
#include <algorithm>

using namespace std;

template <typename T>
inline void Vector<T>::fill(const T& val)
{
    for (int i = 0; i < len; ++i) {
        arr[i] = val;
    }
}

template <typename T>
inline T& Vector<T>::sum()
{
    T total = 0;
    for (int i = 0; i < len; ++i) {
        total += arr[i];
    }
    return total;
}

template <typename T>
Vector<T>::Vector(const int n) : arr(new T[n]()), len(n)
{
    //cout << "Vector(n)" <<'\n';
}

template <typename T>
Vector<T>::Vector(const int n, const T& val) : arr(new T[n]), len(n)
{
    //cout << "Vector(n, val)" <<'\n';
    for (int i = 0; i < len; ++i) {
        arr[i] = val;
    }
}

template <typename T>
T& Vector<T>::operator[](const int i)
{
    return arr[i];
}

template <typename T>
int const Vector<T>::length()
{
    return len;
}

template class Vector<int>;
template class Vector<float>;

vector_test.cpp

#include "vector.h"
#include <iostream>
#include <chrono>
#include <vector>

using namespace std;

int main() 
{
    const int n = 2000000;
    float sum = 0;
    chrono::steady_clock::time_point start = chrono::steady_clock::now();   
    Vector<float> vec(n, 1);
    sum = vec.sum();
    chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
    cout << "my vec sum = " << sum << '\n';
    cout << "my vec impl took " << chrono::duration_cast<chrono::microseconds>(end - start).count()
              << "us.\n";

    sum = 0;
    start = chrono::steady_clock::now();
    vector<float> vec2(n, 1);
    for (int i = 0; i < n; ++i) {
        sum += vec2[i];
    }
    end = std::chrono::steady_clock::now();
    cout << "std::vec sum = " << sum << '\n';
    cout << "stl::vec impl took " << chrono::duration_cast<chrono::microseconds>(end - start).count()
              << "us.\n";
}

输出:

my vec sum = 2e+06
my vec impl took 11040us.
std::vec sum = 2e+06
stl::vec impl took 8034us.

1 个答案:

答案 0 :(得分:1)

这是非常天真的代码,因为在每次迭代时都会重新评估索引(并且您希望优化器会对其进行优化):

for (int i = 0; i < len; ++i) {
    arr[i] = val;
}

这是一个更好的方法:

T* ptr = arr;
T* end = ptr + len;
while ( ptr < end ) *ptr++ = val;

然而,一个好的编译器确实会进行这种转换。

同样的想法可以适用于Sum()

template <typename T> inline T Vector<T>::sum()
{
    T* ptr = arr;
    T* end = ptr + len;
    T total = 0;

    while ( ptr < end ) total += *ptr++;

    return total;
}