为什么vector在这段代码中使用的内存少于指针?

时间:2015-05-28 16:37:48

标签: c++ pointers vector strassen

我使用指针编写了基于Strassen乘法算法的并行程序。 该程序返回两个大小相同的矩阵相乘的结果。 当大小为256时,程序填充大约1 GB的ram,当它为512 ram时,\ y变满,我的窗口不起作用,那么我必须重新启动。

我用向量替换整个指针然后令人难以置信的Ram使用率降低了!。对于1024大小,只使用了80 MB的ram。

我对一开始静态绑定的向量知之甚少,如果我们在运行时期间需要更多空间动态绑定。

为什么指针需要比矢量更多的空间?

这是我的第一个代码:

#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>

#include <Windows.h>
#include <Psapi.h>
#include<vector>


using namespace std;

int ** matrix_1;
int ** matrix_2;

#define number_thread:4;

void show(string name, int n, int **show)
{
    cout << " matrix " << name << " :" << endl;
    for (int i = 0; i < n; i++)
    {

        for (int j = 0; j < n; j++)
            cout << show[i][j] << " ";
        cout << endl;
    }
}



int ** strassen(int n, int **matrix_a, int ** matrix_b)
{

    int ** A11;
    int ** A12;
    int ** A21;
    int ** A22;

    int ** B11;
    int ** B12;
    int ** B21;
    int ** B22;

    int ** result;


    int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
    A11 = new int*[n / 2];
    A12 = new int*[n / 2];
    A21 = new int*[n / 2];
    A22 = new int*[n / 2];

    B11 = new int*[n / 2];
    B12 = new int*[n / 2];
    B21 = new int*[n / 2];
    B22 = new int*[n / 2];


    result = new int *[n];

    m1 = new int*[n / 2];
    m2 = new int*[n / 2];
    m3 = new int*[n / 2];
    m4 = new int*[n / 2];
    m5 = new int*[n / 2];
    m6 = new int*[n / 2];
    m7 = new int*[n / 2];
    m8 = new int*[n / 2];

    cilk_for(int i = 0; i < n / 2; i++)
    {
        //cout << " value i : " << i << endl;
        A11[i] = new int[n / 2];
        A12[i] = new int[n / 2];
        A21[i] = new int[n / 2];
        A22[i] = new int[n / 2];

        B11[i] = new int[n / 2];
        B12[i] = new int[n / 2];
        B21[i] = new int[n / 2];
        B22[i] = new int[n / 2];

        m1[i] = new int[n / 2];
        m2[i] = new int[n / 2];
        m3[i] = new int[n / 2];
        m4[i] = new int[n / 2];
        m5[i] = new int[n / 2];
        m6[i] = new int[n / 2];
        m7[i] = new int[n / 2];
        m8[i] = new int[n / 2];

    }

    cilk_for(int i = 0; i < n; i++) // matrix result
        result[i] = new int[n];


    if (n == 2)
    {
        result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
        result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
        result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
        result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];

        return result;

    }
    //  for (int i = 0; i < n;i++)

    cilk_for(int i = 0; i < (n / 2); i++)
    {
        for (int j = 0; j < (n / 2); j++)
        {
            A11[i][j] = matrix_a[i][j];
            B11[i][j] = matrix_b[i][j];

            A12[i][j] = matrix_a[i][j + n / 2];
            B12[i][j] = matrix_b[i][j + n / 2];

            A21[i][j] = matrix_a[i + n / 2][j];
            B21[i][j] = matrix_b[i + n / 2][j];

            A22[i][j] = matrix_a[i + n / 2][j + n / 2];
            B22[i][j] = matrix_b[i + n / 2][j + n / 2];


        }
    }
    /*
    show("A11", n / 2, A11);
    show("A12", n / 2, A12);
    show("A21", n / 2, A21);
    show("A22", n / 2, A22);
    show("B11", n / 2, B11);
    show("B12", n / 2, B12);
    show("B21", n / 2, B21);
    show("B22", n / 2, B22);*/

    // Run By eight_thread
    m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
    m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
    m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
    m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
    m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
    m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
    m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
    m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22



    cilk_sync;

    /*
    cout << "****************************\n";
    cout << "*********** before add :\n";
    show("m1", n / 2, m1);
    show("m2", n / 2, m2);
    show("m3", n / 2, m3);
    show("m4", n / 2, m4);
    show("m5", n / 2, m5);
    show("m6", n / 2, m6);
    show("m7", n / 2, m7);
    show("m8", n / 2, m8);*/


    cilk_for(int i = 0; i < n / 2; i++)
    for (int j = 0; j < n / 2; j++)
    {
        m1[i][j] = m1[i][j] + m2[i][j];
        m3[i][j] = m3[i][j] + m4[i][j];
        m5[i][j] = m5[i][j] + m6[i][j];
        m7[i][j] = m7[i][j] + m8[i][j];

    }

    /*cout << "after adding hello \n";
    show("m1", n / 2, m1);
    show("m3", n / 2, m3);
    show("m5", n / 2, m5);
    show("m7", n / 2, m7);*/



    cilk_for(int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            if (i < n / 2 && j < n / 2)
            {
                result[i][j] = m1[i][j];
            }
            else if (i < n / 2 && j >= n / 2)
            {
                result[i][j] = m3[i][j - n / 2];
            }
            else if (i >= n / 2 && j < n / 2)
            {
                result[i][j] = m5[i - n / 2][j];
            }
            else if (i >= n / 2 && j >= n / 2)
            {
                result[i][j] = m7[i - n / 2][j - n / 2];

            }
        }
    }

    /*
    cilk_for(int i = 0; i < n / 2; i++)
    {
    for (int j = 0; j < n / 2; j++)
    {
    delete A11[i][j];
    delete A12[i][j];
    delete A21[i][j];
    delete A22[i][j];
    delete B11[i][j];
    delete B12[i][j];
    delete B21[i][j];
    delete B22[i][j];


    delete m1[i][j];
    delete m2[i][j];
    delete m3[i][j];
    delete m4[i][j];
    delete m5[i][j];
    delete m6[i][j];
    delete m7[i][j];
    delete m8[i][j];*/






    /*  }
        delete[] A11[i];
        delete[] A12[i];
        delete[] A21[i];
        delete[] A22[i];
        delete[] B11[i];
        delete[] B12[i];
        delete[] B21[i];
        delete[] B22[i];


        delete[] m1[i];
        delete[] m2[i];
        delete[] m3[i];
        delete[] m4[i];
        delete[] m5[i];
        delete[] m6[i];
        delete[] m7[i];
        delete[] m8[i];
        }*/


    delete[] A11;
    delete[] A12;
    delete[] A21;
    delete[] A22;
    delete[] B11;
    delete[] B12;
    delete[] B21;
    delete[] B22;


    delete[] m1;
    delete[] m2;
    delete[] m3;
    delete[] m4;
    delete[] m5;
    delete[] m6;
    delete[] m7;
    delete[] m8;

    return result;
}



int main()
{

    int size;

    freopen("in.txt", "r", stdin);
    freopen("out.txt", "w", stdout);


    __cilkrts_set_param("nworkers", "4");
    //cout << " please Enter the size OF ur matrix /n";
    cin >> size;

    matrix_1 = new int*[size];
    matrix_2 = new int*[size];

    if (size % 2 == 0)
    {

        //instialize matrix1
        //cout << "matrix_1 :" << endl;
        for (int i = 0; i < size; i++)
        {
            matrix_1[i] = new int[size];
            for (int j = 0; j < size; j++)

            {
                matrix_1[i][j] = rand() % 3;
                //cin >> matrix_1[i][j];
                //cout << matrix_1[i][j] << " ";

            }
            //cout << endl;

        }
        //instialize matrix2
        //cout << "matrix2_is :\n";
        for (int i = 0; i < size; i++)
        {
            matrix_2[i] = new int[size];
            for (int j = 0; j < size; j++)

            {

                matrix_2[i][j] = rand() % 3;
                //cout << matrix_2[i][j]<<" ";
                //cin >> matrix_2[i][j];

            }
            //  cout << endl;

        }
        clock_t begin = clock();


        matrix_2 = strassen(size, matrix_1, matrix_2);

        clock_t end = clock();
        double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;

        cout << "*******\ntime is : " << elapsed_secs << endl;

        //answer:
        /*  for (int i = 0; i < size; i++)
            {
            for (int j = 0; j < size; j++)

            {
            cout<< matrix_2[i][j]<<" ";

            }
            cout << endl;

            }*/


    }
    else
        cout << " we couldnt use strasen ";

    cout << "\nTotal Virtual Memory:" << endl;

    MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
    printf("%u", totalVirtualMem);

    cout << "\nVirtual Memory currently used:" << endl;
    //  MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
    printf("%u", virtualMemUsed);


    cout << "\nVirtual Memory currently used by current process:" << endl;

    PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
    printf("%u", virtualMemUsedByMe);

    cout << "\nPhysical Memory currently used: " << endl;
    //MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;

    printf("%u", physMemUsed);

    cout << endl;
    cout << "\nPhysical Memory currently used by current process : " << endl;
    //  PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
    printf("%u", physMemUsedByMe);
    //cout << "memory usage :"<<double(totalVirtualMem) << endl;


    //_getch();

    return 0;

}

我用向量替换整个指针数组:

#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>

#include <Windows.h>
#include <Psapi.h>
#include<vector>


using namespace std;
vector<vector<int> > matrix_1, matrix_2;

//int matrix_1;
//int ** matrix_2;

#define number_thread:4;

void show(string name ,int n, int **show)
{
    cout << " matrix " << name<<" :" << endl;
    for (int i = 0; i < n; i++)
    {

        for (int j = 0; j < n; j++)
            cout << show[i][j] << " ";
        cout << endl;
    }
}


vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)
{

    vector<vector<int>> A11;
    vector<vector<int>> A12;
    vector<vector<int>> A21;
    vector<vector<int>> A22;

    vector<vector<int>> B11;
    vector<vector<int>> B12;
    vector<vector<int>> B21;
    vector<vector<int>> B22;

    vector<vector<int>> result;


    vector<int> help;


    vector<vector<int>> m1, m2, m3,  m4, m5,  m6,  m7,  m8;




    help.clear();
    for (int j = 0; j < n / 2; j++)
    {
        help.push_back(2);
    }


    for(int i = 0; i < n / 2; i++)
    {
        A11.push_back(help);
        A12.push_back(help);
        A21.push_back(help);
        A22.push_back(help);

        B11.push_back(help);
        B12.push_back(help);
        B21.push_back(help);
        B22.push_back(help);


        m1.push_back(help);
        m2.push_back(help);
        m3.push_back(help);
        m4.push_back(help);

        m5.push_back(help);
        m6.push_back(help);
        m7.push_back(help);
        m8.push_back(help);
    }


    for (int j = 0; j < n / 2; j++)
        help.push_back(2);
    for(int i = 0; i < n; i++)
    {
        result.push_back(help);

    }
    if (n == 2)
    {
        result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
        result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
        result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
        result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];

        return result;

    }
    //  for (int i = 0; i < n;i++)

    for(int i = 0; i < (n / 2); i++)
    {
        for(int j = 0; j <( n / 2); j++)
        {
            A11[i][j] = matrix_a[i][j];
            B11[i][j] = matrix_b[i][j];

            A12[i][j] = matrix_a[i][j + n / 2];
            B12[i][j] = matrix_b[i][j + n / 2];

            A21[i][j] = matrix_a[i + n / 2][j];
            B21[i][j] = matrix_b[i + n / 2][j];

            A22[i][j] = matrix_a[i + n / 2][j + n / 2];
            B22[i][j] = matrix_b[i + n / 2][j + n / 2];


        }
    }
    /*
    show("A11", n / 2, A11);
    show("A12", n / 2, A12);
    show("A21", n / 2, A21);
    show("A22", n / 2, A22);
    show("B11", n / 2, B11);
    show("B12", n / 2, B12);
    show("B21", n / 2, B21);
    show("B22", n / 2, B22);*/

    // Run By eight_thread
    m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
    m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
    m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
    m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
    m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
    m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
    m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
    m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22



    cilk_sync;

    /*
    cout << "****************************\n";
    cout << "*********** before add :\n";
    show("m1", n / 2, m1);
    show("m2", n / 2, m2);
    show
("m3", n / 2, m3);
    show("m4", n / 2, m4);
    show("m5", n / 2, m5);
    show("m6", n / 2, m6);
    show("m7", n / 2, m7);
    show("m8", n / 2, m8);*/


    for(int i = 0; i < n / 2; i++)
    for (int j = 0; j < n / 2; j++)
    {
        m1[i][j] = m1[i][j] + m2[i][j];
        m3[i][j] = m3[i][j] + m4[i][j];
        m5[i][j] = m5[i][j] + m6[i][j];
        m7[i][j] = m7[i][j] + m8[i][j];

    }

        /*cout << "after adding hello \n";
        show("m1", n / 2, m1);
        show("m3", n / 2, m3);
        show("m5", n / 2, m5);
        show("m7", n / 2, m7);*/



    for(int i = 0; i < n ; i++)
    {
        for(int j = 0; j < n ; j++)
        {
            if (i < n / 2 && j < n / 2)
            {
                result[i][j] = m1[i][j];
            }
            else if (i < n / 2 && j >= n / 2)
            {
                result[i][j] = m3[i][j - n / 2];
            }
            else if (i >= n / 2 && j < n / 2)
            {
                result[i][j] = m5[i - n / 2][j];
            }
            else if (i >= n / 2 && j >= n / 2)
            {
                result[i][j] = m7[i - n / 2][j - n / 2];

            }
        }
    }



    /*
    cilk_for(int i = 0; i < n / 2; i++)
    {
        for (int j = 0; j < n / 2; j++)
        {
            delete A11[i][j];
            delete A12[i][j];
            delete A21[i][j];
            delete A22[i][j];
            delete B11[i][j];
            delete B12[i][j];
            delete B21[i][j];
            delete B22[i][j];


            delete m1[i][j];
            delete m2[i][j];
            delete m3[i][j];
            delete m4[i][j];
            delete m5[i][j];
            delete m6[i][j];
            delete m7[i][j];
            delete m8[i][j];*/






    /*  }
        delete[] A11[i];
        delete[] A12[i];
        delete[] A21[i];
        delete[] A22[i];
        delete[] B11[i];
        delete[] B12[i];
        delete[] B21[i];
        delete[] B22[i];


        delete[] m1[i];
        delete[] m2[i];
        delete[] m3[i];
        delete[] m4[i];
        delete[] m5[i];
        delete[] m6[i];
        delete[] m7[i];
        delete[] m8[i];
    }*/


/*  for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)

        {
            cout << result[i][j] << " ";

        }
        cout << endl;

    }*/

    return result;
}



int main()
{

    int size;

    freopen("in.txt","r",stdin);
    freopen("out.txt", "w", stdout);


    __cilkrts_set_param("nworkers", "1");
    //cout << " please Enter the size OF ur matrix /n";
    cin >> size;

    vector<int> inner;
    if (size % 2 == 0)
    {

        //instialize matrix1
        cout << "matrix_1 :" << endl;
        for (int i = 0; i < size; i++)
        {
            inner.clear();

            for (int j = 0; j < size; j++)

            {
                inner.push_back(rand()%3);
                //cin >> matrix_1[i][j];
                cout << inner[j]<<" ";

            }
            cout << endl;

            matrix_1.push_back(inner);
        }
        //instialize matrix2
        cout << "matrix2_is :\n";
        inner.clear();
        for (int i = 0; i < size; i++)
        {
            inner.clear();
            //matrix_2[i] = new int[size];
            for (int j = 0; j < size; j++)

            {

            inner.push_back(rand()%3);
            cout << inner[j]<<" ";
                //cin >> matrix_2[i][j];

            }
            cout << endl;
            matrix_2.push_back(inner);
        }
        clock_t begin = clock();


        matrix_2 = strassen(size, matrix_1, matrix_2);

        clock_t end = clock();
        double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;

        cout << "*******\ntime is : " << elapsed_secs << endl;

        //answer:
        cout << "answerrr :" << endl;
        for (int i = 0; i < size; i++)
        {
            for (int j = 0; j < size; j++)

            {
                cout<< matrix_2[i][j]<<" ";

            }
            cout << endl;

        }


    }

    else
    cout << " we couldnt use strasen ";

    cout << "\nTotal Virtual Memory:" << endl;

    MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
    printf("%u", totalVirtualMem);

    cout << "\nVirtual Memory currently used:" << endl;
//  MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
    printf("%u", virtualMemUsed);


    cout << "\nVirtual Memory currently used by current process:" << endl;

    PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
    printf("%u", virtualMemUsedByMe);

    cout << "\nPhysical Memory currently used: " << endl;
    //MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;

    printf("%u", physMemUsed);

    cout << endl;
    cout << "\nPhysical Memory currently used by current process : " << endl;
//  PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
    printf("%u", physMemUsedByMe);
    //cout << "memory usage :"<<double(totalVirtualMem) << endl;


    //_getch();

    return 0;

}

2 个答案:

答案 0 :(得分:1)

有两个可能的原因:

  • 如果手动分配内存并且没有正确释放内存,则会造成内存泄漏。使用原始指针,这比使用向量更容易发生。
  • 如果在1000个单独的分配中分配1000个整数,则比分配1000个整数的单个块(向量的作用)所需的空间要多得多。每次分配都需要一些额外的记忆来记账。

答案 1 :(得分:0)

我猜这是一个分配问题。 OS的分配似乎比我看到的要耗费时间。

只是一个猜测,但是std::vector默认分配器可能正在从OS抓取一个更大的连续内存块,并从中抽取来满足较小的向量分配?

这个答案可能提供一些见解:

https://stackoverflow.com/a/29659791/3807729

我设法通过分配,然后在运行计时操作之前释放大std::vector来减少运行测试程序所花费的时间。

我推测C++运行时系统(在某些实现中)可能会保留它从OS收到的内存,即使它已被解除分配,因为从{{1获取小块每次都要贵得多。