嗨我有一个简单的使用纹理记忆计算。但我无法保存正确的结果。 结果应该是插值。例如,角度= 0.5 A [0] = 1,B [0] = 2,结果[0]应为1.5
我想我没有正确保存数据。我想使用纹理内存进行快速计算,并将结果保存在全局数组中。有些事情我做错了。有没有人有什么想法?
这是我在内核中的代码
#ifndef _SIMPLETEXTURE_KERNEL_H_
#define _SIMPLETEXTURE_KERNEL_H_
texture<float, 1> tex1;
texture<float, 1> tex2;
__global__ void
transformKernel( float* g_odata, float f)
{
// calculate normalized texture coordinates
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
int idx = threadIdx.x;
float valA = tex1D(tex1,x);
float valB = tex1D(tex2,x);
// read from texture and write to global memory
g_odata[x] = (f)*valA + (1-f)*valB;
}
#endif
这里是我调用的代码
#include <stdio.h>
#include <iostream>
#include "cuda.h"
#include <stdlib.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "HelloWorld.h"
#include "linearInterpolation_kernel.cu"
using namespace std;
using std::cout;
const int blocksize = 16;
int main()
{
int N = 1000;
float *A;
A = (float *) malloc(N*sizeof(float));
float *B;
B = (float *) malloc(N*sizeof(float));
float *result;
result = (float *) malloc(N*sizeof(float));
float angle = 0.5f;
for(int i = 0; i < N; i++){
A[i] = (float)rand();
B[i] = (float)rand();
}
cout << A[3] << endl;
cout << B[3] << endl;
ipLinearTexture(A,B,result,angle,N);
float result2;
result2 = (angle)*A[3] + (1-angle)*B[3];
printf(" A %f B %f Result %f\n", A[3], B[3], result[3]);
cout << result2 << endl;
return 1;
}
void ipLinearTexture(float *A, float* B, float* result, float angle, int N)
{
float cuTime;
const cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<float>();
cudaArray* cuda_A;
cudaArray* cuda_B;
float *dev_result;
cudaMallocArray(&cuda_A, &channel_desc, 1, N * sizeof(float));
cudaMallocArray(&cuda_B, &channel_desc, 1, N * sizeof(float));
cudaMalloc(&dev_result, N * sizeof(float));
cudaMemcpyToArray(cuda_A,0,0,A,N * sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpyToArray(cuda_B,0,0,B,N * sizeof(float),cudaMemcpyHostToDevice);
tex1.filterMode = cudaFilterModePoint;
tex1.addressMode[0] = cudaAddressModeWrap;
tex2.filterMode = cudaFilterModePoint;
tex2.addressMode[0] = cudaAddressModeWrap;
cudaBindTextureToArray(tex1, cuda_A, channel_desc);
cudaBindTextureToArray(tex2, cuda_B, channel_desc);
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventRecord(start,0);
transformKernel<<< 16, 16, 0 >>>(dev_result,angle);
cudaEventCreate(&stop);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&cuTime, start,stop);
cudaMemcpy(result, dev_result, N * sizeof(float), cudaMemcpyKind::cudaMemcpyDeviceToHost);
result[0] = (float)cuTime;
cudaFreeArray(cuda_A);
cudaFreeArray(cuda_B);
cudaFree(dev_result);
}
答案 0 :(得分:2)
根据编程指南和文档in this page中的示例,似乎cudaMallocArray函数的声明是:
cudaError_t cudaMallocArray(struct cudaArray **array,
const struct cudaChannelFormatDesc *desc,
size_t width,
size_t height = 0,
unsigned int flags = 0)
在您发布的代码中,您传递字节中的大小。请尝试使用N
,删除sizeof(float)
至少上次我使用纹理进行线性插值时,当我分配cudaArray
时,我没有指定字节大小而且它有效。
请注意,当您调用cudaMemcpyToArray
时,大小应以字节为单位。