Question

我使用NPP编写了一个BoxFilter示例，但输出图像看起来很破碎。这是我的代码：

#include <stdio.h>
#include <string.h>

#include <ImagesCPU.h>
#include <ImagesNPP.h>
#include <Exceptions.h>

#include <npp.h>
#include "utils.h"


void boxfilter1_transform( Npp8u *data, int width, int height ){
    size_t size = width * height * 4;

    // declare a host image object for an 8-bit RGBA image
    npp::ImageCPU_8u_C4 oHostSrc(width, height);

    Npp8u *nDstData = oHostSrc.data();
    memcpy(nDstData, data, size * sizeof(Npp8u));

    // declare a device image and copy construct from the host image,
    // i.e. upload host to device
    npp::ImageNPP_8u_C4 oDeviceSrc(oHostSrc);

    // create struct with box-filter mask size
    NppiSize oMaskSize = {3, 3};

    // Allocate memory for pKernel
    Npp32s hostKernel[9] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
    Npp32s *pKernel;

    checkCudaErrors( cudaMalloc((void**)&pKernel, oMaskSize.width * oMaskSize.height * sizeof(Npp32s)) );
    checkCudaErrors( cudaMemcpy(pKernel, hostKernel, oMaskSize.width * oMaskSize.height * sizeof(Npp32s),
                                cudaMemcpyHostToDevice) );

    Npp32s nDivisor = 9;

    // create struct with ROI size given the current mask
    NppiSize oSizeROI = {oDeviceSrc.width() - oMaskSize.width + 1, oDeviceSrc.height() - oMaskSize.height + 1};
    // allocate device image of appropriatedly reduced size
    npp::ImageNPP_8u_C4 oDeviceDst(oSizeROI.width, oSizeROI.height);
    // set anchor point inside the mask
    NppiPoint oAnchor = {2, 2};

    // run box filter
    NppStatus eStatusNPP;
    eStatusNPP = nppiFilter_8u_C4R(oDeviceSrc.data(), oDeviceSrc.pitch(),
                                   oDeviceDst.data(), oDeviceDst.pitch(),
                                   oSizeROI, pKernel, oMaskSize, oAnchor, nDivisor);
    //printf("NppiFilter error status %d\n", eStatusNPP);
    NPP_DEBUG_ASSERT(NPP_NO_ERROR == eStatusNPP);

    // declare a host image for the result
    npp::ImageCPU_8u_C4 oHostDst(oDeviceDst.size());
    // and copy the device result data into it
    oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch());
    memcpy(data, oHostDst.data(), size * sizeof(Npp8u));

    return;
}

大部分代码都是从示例boxFilterNPP.cpp复制而来的。输出图像：http://img153.imageshack.us/img153/7716/o8z.png

为什么会这样？

Answer 1

你有一个跨步问题。改变这一行：

npp::ImageCPU_8u_C4 oHostDst(oDeviceDst.size());

对此：

npp::ImageCPU_8u_C4 oHostDst(oDeviceSrc.size());

发生了什么事？

我们假设您的输入图像是600x450。

oHostSrc为600 x 450，间距为600x4 = 2400。
从memcpy到data的{{1}}没问题，因为它们的宽度和音高相同。
oHostSrc从oDeviceSrc（600x450）
oHostSrcc 比oDeviceDst稍小，因为它只能获得ROI的大小，所以它就像596x446。
您的代码创建的oDeviceSrc与oHostDst的大小相同，因此约为596x446。
oDeviceDst操作将oDeviceDst（pitched）596x446图像复制到（unpitched）.copyTo，同时复制596x446。
最终oHostDst会破坏图片，因为它正在将596x446 memcpy图片复制到600x450 oHostDst区域。

解决方案是在600x450创建data并让oHostDst操作处理行大小和音高的差异。

原始代码没有此问题，因为该代码中的任何位置都有 no 无需复制的副本（例如，不使用原始.copyTo）。只要您在每个复制步骤中明确处理源和目标的间距和宽度，无论您是将最终图像创建为600x450还是596x446都无关紧要。但是你的最终memcpy操作并未明确处理音高和宽度，而是隐含地假设源和目标都具有相同的大小，而事实并非如此。

nppiFilter打破输出图像

1 个答案: