c ++数组与std :: vector和std :: array的效率

时间:2016-03-12 06:21:41

标签: c++ arrays vector

我比较了不同类型的1d阵列或2d阵列,如下所示。我发现使用new运算符效率更高,也许std :: arrary和std :: vector是一个对象,它们是通用且安全但更多时间?更何况,我不知道为什么调用新的外部函数比内部函数更有效?

#include <iostream>
#include <vector>
#include <array>
#include <ctime>


void test1 () {
int *arr = new int[10000];

for (int i=0; i<10000; ++i) {
    arr[i] = 3;
}

for (int i=0; i<10000; ++i) {
    int a = arr[i];
}
delete arr;
}

void test11 () {
int **arr = new int*[100];
for (int i=0; i<100; ++i) {
    arr[i] = new int[100];
}

for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
        arr[i][j] = 3;
    }
}

for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
        int a = arr[i][j];
    }
}

delete [] arr;
}


void test2() {
std::vector<int> arr(10000);

for (int i=0; i<10000; ++i) {
    arr[i] = 3;
}

for (int i=0; i<10000; ++i) {
     int a = arr[i];
}
}

void test22() {
std::vector<std::vector<int> > arr(100, std::vector<int>(100));

for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
        arr[i][j] = 3;
    }
}

for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
        int a = arr[i][j];
    }
}
}



void test3(int *arr, int n) {
for (int i=0; i<n; ++i) {
    arr[i] = 3;
}
for (int i=0; i<n; ++i) {
     int a = arr[i];
}
}

void test33(int **arr, int m, int n) {
for (int i=0; i<m; ++i) {
    for (int j=0; j<n; ++j) {
        arr[i][j] = 3;
    }
}

for (int i=0; i<m; ++i) {
    for (int j=0; j<n; ++j) {
        int a = arr[i][j];
    }
}

}

void test4() {
std::array<int, 10000> arr;
for (int i=0; i<10000; ++i) {
    arr[i] = 3;
}
for (int i=0; i<10000; ++i) {
     int a = arr[i];
}
}

void test44() {
std::array<std::array<int, 100>, 100> arr;
for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
         arr[i][j] = 3;
    }
}
for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j)
     int a = arr[i][j];
}
}


int main() {
clock_t start, end;
start = clock();
for (int i=0; i<1000; ++i) {
    test1();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
for (int i=0; i<1000; ++i) {
    test11();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
for (int i=0; i<1000; ++i) {
    test2();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
for (int i=0; i<1000; ++i) {
    test22();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
for (int i=0; i<1000; ++i) {
    int *arr = new int[10000];
    test3(arr, 10000);
    delete arr;
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
int **arr = new int*[100];
for (int i=0; i<100; ++i) {
    arr[i] = new int[100];
}

for (int i=0; i<1000; ++i) {
    test33(arr, 100, 100);
}
delete [] arr;
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
for (int i=0; i<1000; ++i) {
    test4();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

start = clock();
for (int i=0; i<1000; ++i) {
    test44();
}
end = clock();
std::cout << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

}

输出是:

90 ms
80 ms
70 ms
120 ms
50 ms
40 ms
100 ms
190 ms

感谢您的帮助,也许我没有正确描述我的问题,我编写了一个多次调用的函数,这个函数新建一个数组然后删除它:

void fun() {
   int *arr = new int[10000]; //maybe very big
   //todo something else
   delete arr; 
}

有人告诉我这不高效,因为它每次都是新的和删除,现在我有两个问题:

1。什么是内存管理的正确方法?

int *arr = new int[]; delete arr;
int **arr = new int*[]; delete [] arr;

错误?也许是这样的:

for (int i=0; i<n; ++i){
  delete [] arr;
}
delete arr;

2。我编写此功能的最佳方式是什么

3 个答案:

答案 0 :(得分:1)

我认为你的测试不对。也许你在调试模式下运行?我没有看到test11()可能比test1()更快(即使它没有释放所有内存)。

此外,在上述许多情况下,发布模式编译器会优化您的代码,因为它实际上没有做任何事情:

for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
        int a = arr[i][j];
    }
}

任何编译器几乎肯定会消除它,因为没有使用'a',这意味着不会使用'i'和'j'。

for (int i=0; i<100; ++i) {
    for (int j=0; j<100; ++j) {
        arr[i][j] = 3;
    }
}

有些编译器甚至可以很好地消除代码,因为内存永远不会被再次读取,但很可能不会。

我建议不要担心vector与new int []的任何性能开销。在调试模式下,在释放模式下,您将获得免费的调试辅助(边界检查),只要您不调用超出范围的函数,代码在性能上将基本相同,以用于实际目的。另外,您不必担心内存管理(test1()和test11()都不太正确。)

答案 1 :(得分:1)

让我们做一些改进测试的工作,并通过正确使用它给标准库一个机会......

#include <iostream>
#include <vector>
#include <array>
#include <ctime>
#include <memory>
#include <algorithm>
#include <iterator>

void test1 () {
    auto arr = std::make_unique<int[]>(10000);
    std::fill(arr.get(), arr.get() + 10000, 3);

    for (int i=0; i<10000; ++i) {
        int a = arr[i];
    }
}

void test11 () {
    auto arr = std::make_unique<std::unique_ptr<int[]>[]>(100);
    for (auto i = 0 ; i < 100 ; ++i) {
        arr[i] = std::make_unique<int[]>(100);
    }

    for (int i=0; i<100; ++i) {
        for (int j=0; j<100; ++j) {
            arr[i][j] = 3;
        }
    }

    for (int i=0; i<100; ++i) {
        for (int j=0; j<100; ++j) {
            int a = arr[i][j];
        }
    }

}


void test2() {
    std::vector<int> arr(10000);
    std::fill(std::begin(arr), std::end(arr), 3);

    for (int i=0; i<10000; ++i) {
        int a = arr[i];
    }
}

void test22() {
    std::vector<std::vector<int> > arr(100, std::vector<int>(100));
    std::for_each(begin(arr),
                  end(arr),
                  [](auto& inner) {
                      std::fill(std::begin(inner), std::end(inner), 3);
                  });

    for (int i=0; i<100; ++i) {
        for (int j=0; j<100; ++j) {
            int a = arr[i][j];
        }
    }
}



void test3(int *arr, int n) {
    std::fill(arr, arr + n, 3);

    for (int i=0; i<n; ++i) {
        int a = arr[i];
    }
}

void test33(const std::unique_ptr<std::unique_ptr<int[]>[]>& arr, int m, int n) {
    for (int i=0; i<m; ++i) {
        for (int j=0; j<n; ++j) {
            arr[i][j] = 3;
        }
    }

    for (int i=0; i<m; ++i) {
        for (int j=0; j<n; ++j) {
            int a = arr[i][j];
        }
    }

}

void test4() {
    std::array<int, 10000> arr;
    std::fill(std::begin(arr), std::end(arr), 3);

    for (int i=0; i<10000; ++i) {
        int a = arr[i];
    }
}

void test44() {
    std::array<std::array<int, 100>, 100> arr;
    std::for_each(begin(arr),
                  end(arr),
                  [](auto& inner) {
                      std::fill(std::begin(inner), std::end(inner), 3);
                  });

    for (int i=0; i<100; ++i) {
        for (int j=0; j<100; ++j)
            int a = arr[i][j];
    }
}


int main() {
    clock_t start, end;
    start = clock();
    for (int i=0; i<1000; ++i) {
        test1();
    }
    end = clock();
    std::cout << "test 1 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    for (int i=0; i<1000; ++i) {
        test11();
    }
    end = clock();
    std::cout << "test 11 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    for (int i=0; i<1000; ++i) {
        test2();
    }
    end = clock();
    std::cout << "test 2 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    for (int i=0; i<1000; ++i) {
        test22();
    }
    end = clock();
    std::cout << "test 22 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    for (int i=0; i<1000; ++i) {
        int *arr = new int[10000];
        test3(arr, 10000);
        delete [] arr;
    }
    end = clock();
    std::cout << "test 3 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    auto arr = std::make_unique<std::unique_ptr<int[]>[]>(100);
    for (auto i = 0 ; i < 100 ; ++i) {
        arr[i] = std::make_unique<int[]>(100);
    }

    for (int i=0; i<1000; ++i) {
        test33(arr, 100, 100);
    }
    arr.reset();

    end = clock();
    std::cout << "test 33 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    for (int i=0; i<1000; ++i) {
        test4();
    }
    end = clock();
    std::cout << "test 4 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

    start = clock();
    for (int i=0; i<1000; ++i) {
        test44();
    }
    end = clock();
    std::cout << "test 44 " << (double)(end - start) * 1000.0 / CLOCKS_PER_SEC << " ms" << std::endl;

}
使用-O2编译时,

在我的计算机上产生结果:

test 1 0.002 ms
test 11 13.506 ms
test 2 2.753 ms
test 22 13.738 ms
test 3 1.42 ms
test 33 1.552 ms
test 4 0 ms
test 44 0 ms

我们还要注意,数组是“小”的,并且正在重复分配和释放。如果你可以重复使用缓冲区,那么时间上的差异就会完全消失。

另请注意:test33很快,因为它永远不会重新分配内存 - 您正在重新使用缓冲区。

答案 2 :(得分:0)

在普通C数组上运行时,编译器能够识别解开循环的机会,从而节省了少量的迭代开销。可能(不太可能)编译器不会优化STL容器的访问循环,因为它不知道它们是否修改任何成员变量。

同样我最好的猜测,为什么test11的性能优于test1,它是交织外循环的多次迭代(利用现代x86 / x64处理器超标量的事实),IE:

for(int j = 0; j < 100; ++j)
{
    for(int i = 0; i < 100; i+=4)
    {
        arr[i][j] = 3;
        arr[i+1][j] = 3;
        arr[i+2][j] = 3;
        arr[i+4][j] = 3;
    }
}

或完全可能是其他东西。编译器可以做一些非常复杂的循环转换来获得每一点点的性能。