阿丽亚CUDA_ERROR_LAUNCH_FAILED

时间:2018-10-17 23:41:43

标签: c# aleagpu

在C#应用程序中使用Alea GPU时遇到问题。尝试使用我的GPU时会引发异常。下面是我的代码和问题的堆栈跟踪。 当我运行代码时,它在第一时间运行良好。但是,如果第二次运行相同,则会显示异常。谁能帮助我找出问题所在以及如何解决。谢谢!

我的代码:

public void Execute(double[] series, double[] query, int range)
{
    var gpu = Gpu.Default;
    var blockDim = GPU_BLOCK_SIZE;
    int threadSize = series.Length - query.Length + 1;
    var gridDim = (int)(threadSize + blockDim - 1) / blockDim;
    var launchParam = new LaunchParam(gridDim, blockDim, query.Length * sizeof(double));
    var dSeries = gpu.Allocate(series);
    var dQuery = gpu.Allocate(query);
    var dDistances = gpu.Allocate<double>(threadSize);
    var dFlatMatrix = gpu.Allocate<double>(threadSize * (query.Length + 1) * 2);

    //Calculate metric DTW 
    gpu.Launch(Calculate, launchParam, dSeries, dQuery, range, dDistances, dFlatMatrix);

    double[] distances = Gpu.CopyToHost(dDistances);

    Gpu.Free(dDistances);
    Gpu.Free(dFlatMatrix);
    Gpu.Free(dQuery);
    Gpu.Free(dSeries);

    for (int i = 0; i < distances.Length; i++)
    {
        //Do something here on CPU
    }
}

private static void Calculate(double[] series, double[] query, int range, double[] distance, double[] flatMatrix)
{
    var idx = blockIdx.x * blockDim.x + threadIdx.x;
    var windowSize = query.Length + 1;
    var sharedQuery = __shared__.ExternArray<double>();
    for (int i = 0; i < windowSize - 1; i++)
    {
        sharedQuery[i] = query[i];
    }
    DeviceFunction.SyncThreads()
    if (idx < series.Length - query.Length + 1)
    {
        int k, l, g
        k = 0; l = 1;
        SetFlatMatrixValue(flatMatrix, idx, windowSize, k, 0, 0);
        for (int m = 1; m < windowSize; m++)
        {
            SetFlatMatrixValue(flatMatrix, idx, windowSize, k, m, Double.PositiveInfinity);

        k = 1; l = 0;
        for (int j = 1; j < windowSize; j++)

            for (int m = 0; m < windowSize; m++)
            {
                SetFlatMatrixValue(flatMatrix, idx, windowSize, k, m, Double.PositiveInfinity);

            var lower = DeviceFunction.Max(j - range, 1);
            var upper = DeviceFunction.Min(j + range + 1, windowSize)
            for (int i = lower; i < upper; i++)
            {
                double prevDist1 = GetFlatMatrixValue(flatMatrix, idx, windowSize, l, i - 1);
                double prevDist2 = GetFlatMatrixValue(flatMatrix, idx, windowSize, l, i);
                double prevDist3 = GetFlatMatrixValue(flatMatrix, idx, windowSize, k, i - 1);
                double dist = (series[idx + j - 1] - sharedQuery[i - 1]) * (series[idx + j - 1] - sharedQuery[i - 1]);
                double prevDist = DeviceFunction.Min(DeviceFunction.Min(prevDist1, prevDist2), prevDist3)
                SetFlatMatrixValue(flatMatrix, idx, windowSize, k, i, dist + prevDist);

            g = k; k = l; l = g;

        distance[idx] = DeviceFunction.Sqrt(GetFlatMatrixValue(flatMatrix, idx, windowSize, l, windowSize - 1));
    }

private static void SetFlatMatrixValue(double[] array, int arrayIdx, int width, int row, int col, double value)
{
    array[(2 * arrayIdx + row) * width + col] = value;

private static double GetFlatMatrixValue(double[] array, int arrayIdx, int width, int row, int col)
{
    return array[(2 * arrayIdx + row) * width + col];
}

StackTrace:

System.Exception 在Alea.CUDAInterop.cuSafeCall@2939.Invoke(字符串消息)

在Alea.CUDAInterop.cuSafeCall(cudaError_enum结果)

在A.cf5aded17df9f7cc4c132234dda010fa7.Copy@918-22.Invoke(单元_arg9)

在Alea.Memory.Copy(FSharpOption 1 streamOpt, Memory src, IntPtr srcOffset, Memory dst, IntPtr dstOffset, FSharpOption 1 lengthOpt)

在Alea.Gpu.RawCopy(FSharpOption 1 streamOpt, Memory src, IntPtr srcOffset, Array dst, IntPtr dstOffset, FSharpOption 1 lengthOpt)

在Alea.Gpu.CopyToHost [T](T [] src)

在D:\ Workspaces \ Visual Studio \ TimeSeriesSubsequencesJoin \ TimeSeriesSubsequencesJoin \ SourceCode \ Algorithms \ BFParallel.cs:line 113中的TimeSeriesSubsequencesJoin.BFParallel.Execute()

0 个答案:

没有答案