Question

此代码应该使用几何布朗运动方法生成库存路径。对于每条路径，我都有10个步骤，但是，正如您在下面看到的那样，从第三条路径开始，所有内容都是0，这不是我想要的。错误在哪里？

#include <iostream>
#include <iomanip>

#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <curand_kernel.h>

const int numSims(10);
const int threadBlockSize(4);

__global__ void generatePaths(float* path)
{
    float r=0.1;
    float sigma=0.3;
    float S0=100;
    float K=100;
    float t=1;

    int steps=10;
    float dt=t/float(steps);

    curandState s;
    int tid=blockIdx.x*blockDim.x+threadIdx.x;
    curand_init(tid, 0, 0, &s);

    float *value=path+tid;
    float S=S0;
    for(unsigned int i=0; i<steps; ++i)
    {
        S=S*expf((r-0.5*sigma*sigma)*dt+sigma*sqrt(dt)*curand_normal(&s));
        *(path+tid+i)=S;
    }
}

int main()
{
    dim3 grid;
    dim3 block;
    block.x=threadBlockSize;
    grid.x=(numSims+threadBlockSize-1)/threadBlockSize;

    int steps=10;
    float *da;
    cudaMalloc((void**)&da, numSims*steps*sizeof(float));
    generatePaths<<<grid, block>>>(da);

    float *values;
    values=(float*)malloc(numSims*steps*sizeof(float));
    cudaMemcpy(values, da, numSims*steps*sizeof(float), cudaMemcpyDeviceToHost);

    for(int i=0; i<numSims; i++)
    {
        for(int j=0; j<steps; j++)
        {
            std::cout<<values[i*steps+j]<<" ";
        }
        std::cout<<std::endl;
    }
    return 0;
}

结果是

103.381 97.1031 106.928 114.18 120.802 98.2669 114.038 106.057 126.741 136.836

125.589 124.903 123.564 102.781 125.09 71.5134 89.9109 92.4751 184.371 101.023

162.256 0 0 0 0 0 0 0 0 0

0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 按任意键继续。。

Answer 1

不应该是

    *(path + tid * steps + i) = S;

而不是

    *(path+tid+i)=S;

您的版本具有竞争条件 - 不同的线程正在写入相同的内存元素

UPD：对，@talonmies指出坏内存访问是有效的 - 你的网格每个块有3个块和4个线程，每个处理steps个元素，但是分配的内存的大小较小。您可以将大小传递给内核并添加一个检查if(tid > size) return;，或更改您的网格以更好地适应任务。

使用几何布朗运动方法调试生成库存路径的CUDA代码

1 个答案: