Question

我正在研究一个矩阵类，该类具有使用CUDA库在gpu上进行的所有计算。我给出了该课程的精简版，以显示我面临的问题。问题是当我在main（）函数中实例化一个对象时，我不确定要调用哪个构造函数。

我的目标是在使用此类时尽可能减少复制数量。因此，我创建了一个move构造函数。但是，令我惊讶的是，该代码既没有使用复制构造函数，也没有使用move构造函数。当我从类中删除move构造函数时，将调用复制构造函数。到目前为止，我的结论是编译器定义了一个隐函数，我不知道。

#include <iostream>
#include <iomanip>
#include <cuda_runtime.h>

class cu_mat {
protected:
    size_t n_rows=0, n_cols=0;
    bool del = 1;
    double *p=NULL;
    cu_mat(const cu_mat &to_b_copied);// Copy constructor
    cu_mat(const size_t &r, const size_t &c, const double &n);// Two argument constructor with initialization

public:
    cu_mat(){}// Default constructor
    cu_mat(cu_mat&& to_be_moved);// Move constructor
    void get();// Print matrix data
    friend cu_mat zeros(const size_t &r, const size_t &c);// Matrix with all values 0
    friend cu_mat zeros(const size_t &n);
    virtual ~cu_mat();
};

/************************************   Copy constructor   ***********************************************/
cu_mat::cu_mat(const cu_mat &to_b_copied) : n_rows(to_b_copied.n_rows), n_cols(to_b_copied.n_cols), del(0)
{
    std::cout << "Copy constructor called." << std::endl;
    if ((n_rows>0)&&(n_cols>0))
    {
        if (to_b_copied.del==0)
        {
            cudaFree(p);
            p = to_b_copied.p;
        }
        else
        {
            cudaMalloc((void**)&p,n_rows*n_cols*sizeof(double)); // Allocate memory on GPU.
            cudaMemcpy(p,to_b_copied.p,n_rows*n_cols*sizeof(double),cudaMemcpyDeviceToDevice); // Copy array from CPU to GPU
        }
    }
}
/***********************************************************************************************************************/


/************************************   Two argument constructor with initialization   ***********************************************/
__global__ void set_data(double* p, const double n, const double n_ele)
{
    unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx<n_ele)
    p[idx] = n;
}
cu_mat::cu_mat(const size_t &r, const size_t &c, const double &n=0) : n_rows(r), n_cols(c), del(0)
{
    if ((n_rows>0)&&(n_cols>0))
    {
        cudaMalloc((void**)&p, n_rows*n_cols*sizeof(double));
        if (n!=0)
        {
            size_t n_ele = n_rows*n_cols;
            size_t n_threads = n_ele;
            set_data<<<n_ele/n_threads,n_threads>>>(p,n,n_ele);
            cudaPeekAtLastError();
        }
        else
        {
            cudaMemset(p,0,n_rows*n_cols*sizeof(double));
        }
    }
}
/***********************************************************************************************************************/


/************************************   Move constructor   ***********************************************/
cu_mat::cu_mat(cu_mat&& to_b_moved)
{
    std::cout << "Move constructor called." << std::endl;
    n_rows = to_b_moved.n_rows; to_b_moved.n_rows = 0;
    n_cols = to_b_moved.n_cols; to_b_moved.n_cols = 0;
    del = 1; to_b_moved.del = 1;
    cudaFree(p); p = to_b_moved.p; to_b_moved.p = NULL;
}
/***********************************************************************************************************************/


/*****************************************   Matrix with all values 0   *****************************************/
cu_mat zeros(const size_t &r, const size_t &c)
{
    cu_mat tmp(r,c);
    return tmp;
}
cu_mat zeros(const size_t &n){return zeros(n,n);}
/***************************************************************************************************************************/


/************************************   Print matrix data   ***********************************************/
void cu_mat::get()
{
    del = 1;
    double *m = new double[n_rows*n_cols](); // Allocate space on CPU memory.
    cudaMemcpy(m,p,n_rows*n_cols*sizeof(double),cudaMemcpyDeviceToHost); // Copy data from GPU to CPU.
    std::cout << std::scientific << std::setprecision(4);
    for(int i = 0; i<n_rows; ++i)
    {
        for(int j = 0; j<n_cols; ++j)
        {
            std::cout<<m[j*n_rows+i]<<" ";
        }
        std::cout<<std::endl;
    }
    delete[] m;
}
/***********************************************************************************************************************/


cu_mat::~cu_mat() {
    if (del==1){
        std::cout << "Destructor called." << p << std::endl;
        cudaFree(p);
        p = NULL;
    }
}


int main()
{
    cu_mat a = zeros(3);
    a.get();
    return 0;
}

我得到的结果如下：

0.0000e+00 0.0000e+00 0.0000e+00
0.0000e+00 0.0000e+00 0.0000e+00
0.0000e+00 0.0000e+00 0.0000e+00
Destructor called.0x501b60000

定义的类调用哪个构造函数？

0 个答案: