Question

我有以下功能：

void Feed(const vector<double> &inputValues)
{
    vector<double> hOutputValues;
    vector<double> hOutputWeights;
    vector<double> hResultValues;

    double *dOutputValues;
    double *dOutputWeights;
    double *dResultValues;

    UpdateLayerOutputValues(_layers[0], inputValues);

    for (unsigned layernum = 1; layernum < _layers.size(); layernum++)
    {
        Layer &prevlayer = _layers[layernum - 1];       

        PrepareDeviceInputValues(prevlayer, hOutputValues, hOutputWeights); 
        hResultValues.resize(_layers[layernum].size() - 1);

        cudaMalloc(&dOutputValues, hOutputValues.size() * sizeof(double));
        cudaMemcpy(dOutputValues, &hOutputValues[0], hOutputValues.size() * sizeof(double),     cudaMemcpyHostToDevice);

        cudaMalloc(&dOutputWeights, hOutputWeights.size() * sizeof(double));
        cudaMemcpy(dOutputWeights, &hOutputWeights[0], hOutputWeights.size() * sizeof(double), cudaMemcpyHostToDevice);

        cudaMalloc(&dResultValues, hOutputWeights.size() * sizeof(double));     

        GetLayerOutputValues << <1, prevlayer.size() >> >(dOutputValues, dOutputWeights, dResultValues, _layers[layernum].size() - 1);

        cudaMemcpy(&hResultValues[0], dResultValues, hOutputWeights.size() * sizeof(double), cudaMemcpyDeviceToHost);       

        UpdateLayerOutputValues(_layers[layernum], hResultValues);

        hResultValues.clear();      
    }

    hOutputValues.clear();
    hOutputWeights.clear();

    cudaFree(dOutputValues);
    cudaFree(dOutputWeights);
    cudaFree(dResultValues);
}

功能本身工作正常但问题在我退出之后就开始了。我在hResultValues向量的重新分配期间遇到异常。如果我注释掉行，则问题不存在：

cudaMemcpy（＆amp; hResultValues [0]，dResultValues，hOutputWeights.size（）* sizeof（double），cudaMemcpyDeviceToHost）;

起初我认为也许向量有问题，我试图用数组替换hResultValues但我仍然得到一个例外。

我没有收到任何异常消息，但我发现dbgheap.c中的int _CrtIsValidHeapPointer（）方法会抛出该异常。

任何人都可以向我解释我的问题的原因是什么？

Answer 1

感谢@PaulMcKenzie，我发现我的代码出了什么问题。基本上hResultValues的大小是_layers [layernum] .size（） - 1但行：

cudaMalloc（＆amp; dResultValues，hOutputWeights.size（）* sizeof（double））;
cudaMemcpy（＆amp; hResultValues [0]，dResultValues，hOutputWeights.size（）* sizeof（double），cudaMemcpyDeviceToHost）;

尺寸不正确。该错误是由我对代码的更改引起的。之前和Paul发现hResultValues的大小与hOutputWeights相当，但后来我决定在GPU上进行更多的计算，因此finad hResultValues小于hOutputWeights。当我进行必要的更改时，我正确地更改了hResultValues的大小，但忘了更改已分配内存的大小。

正确解决此问题的方法是：

cudaMalloc(&dResultValues, (layernum].size() - 1) * sizeof(double));
cudaMemcpy(&hResultValues[0], dResultValues, (layernum].size() - 1) * sizeof(double), cudaMemcpyDeviceToHost);

使用CudaMemcpy后向量释放期间的异常

1 个答案: