Cuda写入设备上的数组不会改变值

时间:2019-03-26 14:05:10

标签: c++ cuda

我有一个1D float3像素数组,为了进行测试,我试图在设备上设置一个数组值。我没有收到错误,但是当我打印数组值时它显示为0。

这是我的设备代码。

__global__ void addKernel(float3 *pixeld_d[])
{
        pixeld_d[threadIdx.x + W *blockIdx.x] = &make_float3(255, 30, 123);
        printf("\n Block %d Thread %d Pixeld_d %d",blockIdx.x,threadIdx.x, pixeld_d[threadIdx.x + W * blockIdx.x]->x);
}

我的主机代码:

        float3* pixeld = new float3[W*H];
        float3** pixeld_d = new float3*[W*H];
        status = cudaMallocManaged((void **)&pixeld_d,(W*H)*sizeof(float3));
        status = cudaMemcpy(pixeld_d,pixeld, (W*H) * sizeof(float3), cudaMemcpyHostToDevice);
        addKernel << <W,H >> > (pixeld_d);

在控制台中,我得到如下结果:

 Block 811 Thread 25 Pixeld_d 0

我希望Pixeld_d为255,但它为0。

这里是完整代码(所有注释的代码都被注释了,因为我从函数调用中删除了某些内容,而vs会给我生成错误):

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <SFML/Graphics.hpp>
#include <stdio.h>
#include <iostream>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
   }
}

#define W 960
#define H 540
int mov;
#define Vector3 float3
//,Sphere sphere,Sphere light
#pragma region MyRegion




__device__ inline double dot(const Vector3& a, const Vector3& b) {
    return (a.x*b.x + a.y*b.y + a.z*b.z);
}


__device__ struct Sphere
{
    Vector3 c;
    float r;
    Sphere(Vector3 i, float j) { c = i, r = j; }
    Vector3 getNormal(const Vector3& pi) const { return (make_float3(make_float3(pi.x - c.x, pi.y - c.y, pi.z - c.z).x / r, make_float3(pi.x - c.x, pi.y - c.y, pi.z - c.z).y / r, make_float3(pi.x - c.x, pi.y - c.y, pi.z - c.z).z / r)); }

};


__device__ __host__ struct Color
{
    int r, g, b;
    Color(float a, float e, float t) { r = a, g = e, b = t; }
};
#pragma endregion
__global__ void addKernel(float3 *pixeld_d[])
{
        pixeld_d[threadIdx.x + W *blockIdx.x] = &make_float3(255, 30, 123);
        printf("\n Block %d Thread %d Pixeld_d %d",blockIdx.x,threadIdx.x, pixeld_d[threadIdx.x + W * blockIdx.x]->x);
        return;/*
        float3 black = make_float3(0, 0, 0);
        float3 red = make_float3(255, 0, 0);
        float3 white = make_float3(255, 255, 255);
        pixeld_d[threadIdx.y] = &black;
        float3 o = make_float3(blockIdx.x, threadIdx.x, 0);
        float3 d = make_float3(0, 0, 1);
        double t = 20000;
        const Vector3 oc = make_float3(o.x - sphere.c.x, o.y - sphere.c.y, o.z - sphere.c.z);
        const double b = 2 * dot(oc, d);
        const double c = dot(oc, oc) - sphere.r * sphere.r;
        double disc = b * b - 4 * c;
        if (!disc < 1e-4)
        {
            disc = sqrt(disc);
            const double t0 = -b - disc;
            const double t1 = -b + disc;
            t = (t0 < t1) ? t0 : t1;
            Vector3 pi = make_float3(o.x + make_float3(d.x * t,d.y * t, d.z * t).x, o.y + make_float3(d.x * t, d.y * t, d.z * t).y,o.z + make_float3(d.x * t, d.y * t, d.z * t).z);
            Vector3 L = make_float3(light.c.x - pi.x, light.c.y - pi.y, light.c.z - pi.z);
            Vector3 N = make_float3(make_float3(pi.x - sphere.c.x, pi.y - sphere.c.y, pi.z - sphere.c.z).x / sphere.r, make_float3(pi.x - sphere.c.x, pi.y - sphere.c.y, pi.z - sphere.c.z).y / sphere.r, make_float3(pi.x - sphere.c.x, pi.y - sphere.c.y, pi.z - sphere.c.z).z / sphere.r);
            double mg = sqrt(L.x*L.x + L.y * L.y + L.z * L.z);
            float3 Lf = make_float3(L.x / mg, L.y / mg, L.z / mg);
            mg = sqrt(N.x*N.x + N.y * N.y + N.z * N.z);
            float3 Nf = make_float3(N.x / mg, N.y / mg, N.z / mg);
            float dt = dot(Lf,Nf);
            int r = (red.x + white.x * dt)*0.5;
            int g = (red.y + white.y * dt)*0.5;
            int b = (red.z + white.z * dt)*0.5;
            if (r < 0)
                r = 0;
            if (g < 0)
                g = 0;
            if (b < 0)
                b = 0;
            pixeld_d[threadIdx.y]->x = r;
            pixeld_d[threadIdx.y]->y = g;
            pixeld_d[threadIdx.y]->z = b;

        }
*/
}

int main()
{   


    sf::RenderWindow window(sf::VideoMode(W, H), "SFML works!");
    sf::Image image;
    image.create(W, H, sf::Color::Black);
    sf::Texture tex;
    sf::Sprite sprite;

    while (window.isOpen())
    {   
        Sphere *sphere;
        Sphere *light;
        cudaMalloc((void **)&sphere, sizeof(Sphere));
        cudaMalloc((void **)&light, sizeof(Sphere));

        if (sf::Keyboard::isKeyPressed(sf::Keyboard::A))
        {
            mov -= 3;
        }
        if (sf::Keyboard::isKeyPressed(sf::Keyboard::D))
        {
            mov += 3;
        }
        window.clear();
        cudaError_t status;
        float3* pixeld = new float3[W*H];
        float3** pixeld_d = new float3*[W*H];
        status = cudaMallocManaged((void **)&pixeld_d,(W*H)*sizeof(float3));
        status = cudaMemcpy(pixeld_d,pixeld, (W*H) * sizeof(float3), cudaMemcpyHostToDevice);
        addKernel << <W,H >> > (pixeld_d);
        std::cout << cudaGetErrorString(status);
        gpuErrchk( cudaPeekAtLastError() );
        gpuErrchk( cudaDeviceSynchronize() );
        cudaMemcpy(pixeld,pixeld_d,(W*H)*sizeof(float3), cudaMemcpyDeviceToHost);
        std::cout << pixeld[399359].x;
        cudaFree(pixeld_d);
        for (int x = 0; x < W; x++)
        {
            for (int y = 0; y < H; y++)
            {
                sf::Color pixel;
                pixel.r = pixeld[x*W*y].x;
                pixel.g = pixeld[x*W*y].y;
                pixel.b = pixeld[x*W*y].z;
                image.setPixel(x, y, pixel);
            }
        }
        tex.loadFromImage(image);
        sprite.setTexture(tex, true);
        window.draw(sprite);
        window.display();
    }

//,*sphere,*light

    return 0;
}
´´´

2 个答案:

答案 0 :(得分:4)

您的程序具有未定义的行为。由于array decay,这

__global__ void addKernel(float3 **pixeld_d)

等同于

float3

因此,您已经声明内核函数将指向__global__ void addKernel(float3 *pixeld_d) 的指针作为输入参数。我在这里推测,但是我想这很可能是最初导致您尝试引入以下所有问题以使编译器关闭并编译代码的原因。你真正想写的是

float3

即,向您的内核传递一个指向pixeld_d数组的指针,它将结果写入其中。

在主机端,您有float3,它是指向 float3** pixeld_d = new float3*[W*H]; 的指针数组的指针,该指针初始化为指向动态分配的指针数组

float3*

我再次推测,但是很可能您实际上希望它只是一个 status = cudaMallocManaged((void **)&pixeld_d,(W*H)*sizeof(float3)); ,但是编译器不允许您在内核调用中将其用作参数。在那之后,您立即用设备内存分配的结果覆盖了该指针,从而泄漏了进程中先前分配的主机内存:

float3

请注意,此处的类型不匹配。您为float3*数组(大概是您实际想要的)而不是&pixel_d数组分配缓冲区,这是您此时要使用的类型所要求的。 float3***实际上是 pixeld_d[threadIdx.x + W *blockIdx.x] = &make_float3(255, 30, 123); 。因此,编译器将在那里发现您的错误,但是您迫使编译器使用C样式强制转换关闭。这是您调用未定义行为的第一处。不幸的是,这种错误通常不会导致崩溃,并且您的程序只会继续按预期运行。

然后继续运行您的内核,该内核将执行以下操作:

make_float3()

在这里,您尝试将临时对象的地址(float3的结果)分配给pixeld_d[i]数组的每个元素。我不确定如何管理此代码,因为它不是合法的C ++,并且任何C ++编译器(包括nvcc)都应拒绝对其进行编译。即使您以某种方式设法进行了编译,这些临时对象也将在此行的末尾自动销毁,并且您到达那里的指针不再指向有效对象。我再次进行推测,但是我认为这样做也是为了使编译器由于类型不匹配而关闭。 float3*实际上是float3而不是import UIKit import PlaygroundSupport class InnerCell: UICollectionViewCell { let textLabel = UILabel() override init(frame: CGRect) { super.init(frame: frame) contentView.backgroundColor = .white textLabel.layer.borderColor = UIColor.gray.cgColor textLabel.layer.borderWidth = 1 textLabel.textAlignment = .center textLabel.translatesAutoresizingMaskIntoConstraints = false contentView.addSubview(textLabel) NSLayoutConstraint.activate([ textLabel.leadingAnchor.constraint(equalTo: contentView.leadingAnchor), textLabel.trailingAnchor.constraint(equalTo: contentView.trailingAnchor), textLabel.topAnchor.constraint(equalTo: contentView.topAnchor), textLabel.bottomAnchor.constraint(equalTo: contentView.bottomAnchor) ]) } required init?(coder aDecoder: NSCoder) { fatalError() } } class Cell: UICollectionViewCell, UICollectionViewDataSource { var data: [Int]! = [] { didSet { collectionView.reloadData() } } private let collectionView: UICollectionView override init(frame: CGRect) { let layout = UICollectionViewFlowLayout() layout.minimumLineSpacing = 1 layout.minimumInteritemSpacing = 1 layout.itemSize = .init(width: 220, height: 220) collectionView = UICollectionView(frame: .zero, collectionViewLayout: layout) super.init(frame: frame) collectionView.isPagingEnabled = true collectionView.backgroundColor = .white collectionView.register(InnerCell.self, forCellWithReuseIdentifier: "Cell") collectionView.dataSource = self collectionView.translatesAutoresizingMaskIntoConstraints = false contentView.addSubview(collectionView) NSLayoutConstraint.activate([ collectionView.leadingAnchor.constraint(equalTo: contentView.leadingAnchor), collectionView.trailingAnchor.constraint(equalTo: contentView.trailingAnchor), collectionView.topAnchor.constraint(equalTo: contentView.topAnchor), collectionView.bottomAnchor.constraint(equalTo: contentView.bottomAnchor) ]) } required init?(coder aDecoder: NSCoder) { fatalError() } func numberOfSections(in collectionView: UICollectionView) -> Int { return 1 } func collectionView(_ collectionView: UICollectionView, numberOfItemsInSection section: Int) -> Int { return data.count } func collectionView(_ collectionView: UICollectionView, cellForItemAt indexPath: IndexPath) -> UICollectionViewCell { let cell = collectionView.dequeueReusableCell(withReuseIdentifier: "Cell", for: indexPath) as! InnerCell cell.textLabel.text = String(data[indexPath.item]) return cell } } class VC: UICollectionViewController, UICollectionViewDelegateFlowLayout { let data = [ 0: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], 1: [6, 4], 2: [5, 5, 5, 5, 6], 3: [1, 2, 3, 4, 5, 6, 7] ] override init(collectionViewLayout layout: UICollectionViewLayout) { super.init(collectionViewLayout: layout) collectionView.isPagingEnabled = true collectionView.register(Cell.self, forCellWithReuseIdentifier: "Cell") } required init?(coder aDecoder: NSCoder) { fatalError() } override func numberOfSections(in collectionView: UICollectionView) -> Int { return 1 } override func collectionView(_ collectionView: UICollectionView, numberOfItemsInSection section: Int) -> Int { return data.count } override func collectionView(_ collectionView: UICollectionView, cellForItemAt indexPath: IndexPath) -> UICollectionViewCell { let cell = collectionView.dequeueReusableCell(withReuseIdentifier: "Cell", for: indexPath) as! Cell cell.data = data[indexPath.item] return cell } public func collectionView(_ collectionView: UICollectionView, layout collectionViewLayout: UICollectionViewLayout, sizeForItemAt indexPath: IndexPath) -> CGSize { return collectionView.bounds.size } } let layout = UICollectionViewFlowLayout() layout.scrollDirection = .horizontal layout.minimumLineSpacing = 0 PlaygroundPage.current.liveView = VC(collectionViewLayout: layout) ,因为您在此处使用的指针类型与您实际尝试使用的缓冲区类型不匹配。

故事的士气:在编译器关闭之前,不要对代码进行任意更改。尝试了解为什么它拒绝编译代码。通常,原因是人们试图做一些没有意义的事情。仅在了解问题所在和解决方法后才更改代码...并且不要在C ++中使用C样式强制转换...

答案 1 :(得分:0)

我必须删除__global__ void addKernel(float3 *pixeld_d[])中的*并删除make_float3前面的&