cudaGetDeviceProperties是否返回损坏的信息?

时间:2012-07-25 16:36:22

标签: cuda

我的程序使用CUDA Radix排序类。在从CUDA 4.0更新到4.2之后,类辅助初始化函数崩溃,并显示消息“堆栈变量'devprop'已损坏”。我已经隔离了注释一些功能代码的问题,发现cudaGetDeviceProperties正在破坏devprop。我只是不知道为什么会这样,以及如何解决问题。我的设置是CUDA 4.2,开发驱动程序301.32,Nsight 2.2,Windows 7 64位,为Win32编译。以下代码段包含崩溃的initDeviceParameters()辅助函数:

namespace nvRadixSort
{
#include "radixsort.h"
#include "cudpp/cudpp.h"
#include <stdio.h>
#include <assert.h>

bool bManualCoalesce = false;
bool bUsePersistentCTAs = false;

void initDeviceParameters(bool keysOnly)
{
    int deviceID = -1;
    if(cudaSuccess == cudaGetDevice(&deviceID))
    {
        cudaDeviceProp devprop;
        cudaGetDeviceProperties(&devprop, deviceID);

        int smVersion = devprop.major * 10 + devprop.minor;

        // sm_12 and later devices don't need help with coalesce in reorderData kernel
        bManualCoalesce = (smVersion < 12);
        bUsePersistentCTAs = (smVersion < 20);

        if(bUsePersistentCTAs)
        {
            //Irrelevant. My setup is 2.1
        }
    }
}
}

这是相关的类代码:

#include <cuda_runtime_api.h>
#include "cudpp/cudpp.h"

namespace nvRadixSort
{

class RadixSort
{
    public:

    RadixSort(unsigned int maxElements, bool keysOnly = false)
        : mScanPlan(0),
          mNumElements(0),
          mTempKeys(0),
          mTempValues(0),
          mCounters(0),
          mCountersSum(0),
          mBlockOffsets(0)
    {
        // Allocate temporary storage
        initialize(maxElements, keysOnly);
    }
    protected: // data

    CUDPPHandle   mCudppContext;
    CUDPPHandle   mScanPlan;        // CUDPP plan handle for prefix sum

    unsigned int  mNumElements;     // Number of elements of temp storage allocated
    unsigned int *mTempKeys;        // Intermediate storage for keys
    unsigned int *mTempValues;      // Intermediate storage for values
    unsigned int *mCounters;        // Counter for each radix
    unsigned int *mCountersSum;     // Prefix sum of radix counters
    unsigned int *mBlockOffsets;    // Global offsets of each radix in each block

    protected: // methods

    void initialize(unsigned int numElements, bool keysOnly)
    {
        // initialize parameters based on present CUDA device
        initDeviceParameters(keysOnly);

        // Allocate temporary storage
        mNumElements = numElements;

        unsigned int numBlocks = ((numElements % (CTA_SIZE * 4)) == 0) ?
                                 (numElements / (CTA_SIZE * 4)) : (numElements / (CTA_SIZE * 4) + 1);
        unsigned int numBlocks2 = ((numElements % (CTA_SIZE * 2)) == 0) ?
                                  (numElements / (CTA_SIZE * 2)) : (numElements / (CTA_SIZE * 2) + 1);

        // Initialize scan
        cudppCreate(&mCudppContext);

        CUDPPConfiguration scanConfig;
        scanConfig.algorithm = CUDPP_SCAN;
        scanConfig.datatype  = CUDPP_UINT;
        scanConfig.op        = CUDPP_ADD;
        scanConfig.options   = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_FORWARD;
        cudppPlan(mCudppContext , &mScanPlan, scanConfig, 16 * numBlocks2, 1, 0);

        cudaMalloc((void **)&mTempKeys,     numElements * sizeof(unsigned int));
        if(!keysOnly)
            cudaMalloc((void **)&mTempValues,   numElements * sizeof(unsigned int));
        cudaMalloc((void **)&mCounters,     WARP_SIZE_ * numBlocks * sizeof(unsigned int));
        cudaMalloc((void **)&mCountersSum,  WARP_SIZE_ * numBlocks * sizeof(unsigned int));
        cudaMalloc((void **)&mBlockOffsets, WARP_SIZE_ * numBlocks * sizeof(unsigned int));

        checkCudaError("RadixSort::initialize()");
    }
}

1 个答案:

答案 0 :(得分:1)

解决了这个问题。即使CUDA 4.2版本可用,我的Visual Studio项目仍然使用CUDA 4.0构建规则和工具。刚刚更改了项目文件以使用新文件,这就是诀窍。