我正在尝试编写std::vector
的实现来学习C ++,我的实现比std::vector
慢(参见输出)。
我想知道如何从任何C ++专家那里改进它。我看到了这个问题(Why is std::vector so fast ( or is my implementation is too slow )),但由于海报使用了错误的数据结构,他的问题没有帮助。
我问我怎么能比std::vector
更快地得到它。
vector.h
template <typename T>
class Vector {
public:
explicit Vector(const int n);
explicit Vector(const int n, const T& val);
T& operator[](const int i);
inline int const length();
inline void fill(const T& val);
private:
T* arr;
int len;
};
vector.cpp
#include "vector.h"
#include <iostream>
#include <algorithm>
using namespace std;
template <typename T>
inline void Vector<T>::fill(const T& val)
{
for (int i = 0; i < len; ++i) {
arr[i] = val;
}
}
template <typename T>
inline T& Vector<T>::sum()
{
T total = 0;
for (int i = 0; i < len; ++i) {
total += arr[i];
}
return total;
}
template <typename T>
Vector<T>::Vector(const int n) : arr(new T[n]()), len(n)
{
//cout << "Vector(n)" <<'\n';
}
template <typename T>
Vector<T>::Vector(const int n, const T& val) : arr(new T[n]), len(n)
{
//cout << "Vector(n, val)" <<'\n';
for (int i = 0; i < len; ++i) {
arr[i] = val;
}
}
template <typename T>
T& Vector<T>::operator[](const int i)
{
return arr[i];
}
template <typename T>
int const Vector<T>::length()
{
return len;
}
template class Vector<int>;
template class Vector<float>;
vector_test.cpp
#include "vector.h"
#include <iostream>
#include <chrono>
#include <vector>
using namespace std;
int main()
{
const int n = 2000000;
float sum = 0;
chrono::steady_clock::time_point start = chrono::steady_clock::now();
Vector<float> vec(n, 1);
sum = vec.sum();
chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
cout << "my vec sum = " << sum << '\n';
cout << "my vec impl took " << chrono::duration_cast<chrono::microseconds>(end - start).count()
<< "us.\n";
sum = 0;
start = chrono::steady_clock::now();
vector<float> vec2(n, 1);
for (int i = 0; i < n; ++i) {
sum += vec2[i];
}
end = std::chrono::steady_clock::now();
cout << "std::vec sum = " << sum << '\n';
cout << "stl::vec impl took " << chrono::duration_cast<chrono::microseconds>(end - start).count()
<< "us.\n";
}
输出:
my vec sum = 2e+06
my vec impl took 11040us.
std::vec sum = 2e+06
stl::vec impl took 8034us.
答案 0 :(得分:1)
这是非常天真的代码,因为在每次迭代时都会重新评估索引(并且您希望优化器会对其进行优化):
for (int i = 0; i < len; ++i) {
arr[i] = val;
}
这是一个更好的方法:
T* ptr = arr;
T* end = ptr + len;
while ( ptr < end ) *ptr++ = val;
然而,一个好的编译器确实会进行这种转换。
同样的想法可以适用于Sum()
:
template <typename T> inline T Vector<T>::sum()
{
T* ptr = arr;
T* end = ptr + len;
T total = 0;
while ( ptr < end ) total += *ptr++;
return total;
}